skill-quality: detect and fix SKILL.md overspecification (#1266)

christso · claude · web-flow · commit 0e1b9cda3e6c · 2026-05-28T01:06:16.000+10:00
* skill-quality: add overspecification anti-pattern + coverage-contract guidance

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* test(studio): fix tests broken by default-to-projects-dashboard change

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* fix(lint): resolve pre-existing biome format/type errors

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* skill-quality: remove verbose observed-case example from checklist

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/apps/cli/src/commands/results/remote.ts b/apps/cli/src/commands/results/remote.ts
@@ -4,6 +4,7 @@ import path from 'node:path';
 import {
   DEFAULT_THRESHOLD,
   type EvaluationResult,
+  type GitListedRun,
   type ResultsConfig,
   type ResultsRepoStatus,
   directPushResults,
@@ -24,11 +25,10 @@ import {
   listResultFilesFromRunsDir,
 } from '../inspect/utils.js';
 
-
 // ── In-memory TTL cache for listGitRuns ────────────────────────────
 // Avoids repeated expensive git ls-tree + git cat-file --batch operations
 // on every API request. Cache key is repoDir, TTL is 60 seconds.
-const gitRunsCache = new Map<string, { data: any; expiresAt: number }>();
+const gitRunsCache = new Map<string, { data: Promise<GitListedRun[]>; expiresAt: number }>();
 const GIT_RUNS_CACHE_TTL_MS = 60_000;
 
 function cachedListGitRuns(repoDir: string) {
@@ -40,12 +40,14 @@ function cachedListGitRuns(repoDir: string) {
   const promise = listGitRuns(repoDir);
   gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
   // Evict stale entry once the promise settles so a fresh fetch replaces it
-  promise.catch(() => {}).finally(() => {
-    const entry = gitRunsCache.get(repoDir);
-    if (entry && entry.expiresAt <= Date.now()) {
-      gitRunsCache.delete(repoDir);
-    }
-  });
+  promise
+    .catch(() => {})
+    .finally(() => {
+      const entry = gitRunsCache.get(repoDir);
+      if (entry && entry.expiresAt <= Date.now()) {
+        gitRunsCache.delete(repoDir);
+      }
+    });
   return promise;
 }
 
diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
@@ -1633,7 +1633,9 @@ export const resultsServeCommand = command({
     // Clone or pull any project entries that declare a source.
     // Non-blocking: fire-and-forget so startup is instant even when some
     // project paths are missing or slow (e.g. /tmp paths that timeout).
-    syncProjects(registry.projects).catch((err) => console.error("Background project sync failed:", err));
+    syncProjects(registry.projects).catch((err) =>
+      console.error('Background project sync failed:', err),
+    );
 
     try {
       let results: EvaluationResult[] = [];
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
@@ -183,9 +183,9 @@ describe('loadResults', () => {
 // ── resolveDashboardMode ───────────────────────────────────────────────
 
 describe('resolveDashboardMode', () => {
-  it('defaults to single-project mode when no projects are registered', () => {
+  it('defaults to project dashboard mode when no projects are registered', () => {
     expect(resolveDashboardMode(0, {})).toEqual({
-      projectDashboard: false,
+      projectDashboard: true,
     });
   });
 
diff --git a/apps/cli/test/unit/studio-navigation.test.ts b/apps/cli/test/unit/studio-navigation.test.ts
@@ -13,8 +13,8 @@ import {
 } from '../../../studio/src/lib/navigation.ts';
 
 describe('studio navigation helpers', () => {
-  it('redirects the root entrypoint to the only registered project', () => {
-    expect(resolveIndexRoute(['demo-project'], undefined, 'analytics')).toEqual({
+  it('redirects when the preferred project id matches a registered project', () => {
+    expect(resolveIndexRoute(['demo-project'], undefined, 'demo-project', 'analytics')).toEqual({
       kind: 'redirect',
       redirectPath: '/projects/demo-project?tab=analytics',
     });
diff --git a/apps/studio/src/components/EvalDetail.tsx b/apps/studio/src/components/EvalDetail.tsx
@@ -287,11 +287,7 @@ function FilesTab({
   return (
     <div className="relative flex h-full min-h-[400px] gap-4">
       {/* FileTree panel — desktop: side-by-side, mobile: full-width slide-over */}
-      <div
-        className={`${
-          mobileShowTree ? 'block' : 'hidden'
-        } md:block w-full md:w-auto`}
-      >
+      <div className={`${mobileShowTree ? 'block' : 'hidden'} md:block w-full md:w-auto`}>
         <FileTree
           files={files}
           selectedPath={effectivePath}
@@ -304,11 +300,7 @@ function FilesTab({
       </div>
 
       {/* MonacoViewer panel — desktop: side-by-side, mobile: full-width */}
-      <div
-        className={`${
-          !mobileShowTree ? 'block' : 'hidden'
-        } md:block flex-1 h-full`}
-      >
+      <div className={`${!mobileShowTree ? 'block' : 'hidden'} md:block flex-1 h-full`}>
         <MonacoViewer value={displayValue} language={displayLanguage} height="100%" />
       </div>
 
diff --git a/apps/studio/src/components/StopRunButton.tsx b/apps/studio/src/components/StopRunButton.tsx
@@ -59,7 +59,10 @@ export function StopRunButton({ runId, status, isReadOnly, projectId }: StopRunB
           'Stopping…'
         ) : (
           <>
-            <span aria-hidden="true" className="inline-block h-2.5 w-2.5 rounded-[1px] bg-current" />
+            <span
+              aria-hidden="true"
+              className="inline-block h-2.5 w-2.5 rounded-[1px] bg-current"
+            />
             Stop
           </>
         )}
diff --git a/apps/studio/src/routes/index.tsx b/apps/studio/src/routes/index.tsx
@@ -28,10 +28,10 @@ import {
   useStudioConfig,
 } from '~/lib/api';
 import {
+  type StudioTabId,
   initialProjectRedirectStorageKey,
   resolveIndexRoute,
   resolveInitialProjectRedirect,
-  type StudioTabId,
 } from '~/lib/navigation';
 import type { RunMeta } from '~/lib/types';
 type TabId = StudioTabId;
diff --git a/plugins/agentic-engineering/skills/agent-plugin-review/references/skill-quality-checklist.md b/plugins/agentic-engineering/skills/agent-plugin-review/references/skill-quality-checklist.md
@@ -56,6 +56,14 @@ description: Use when tests have race conditions, timing dependencies, or pass/f
 - [ ] Move heavy reference (100+ lines) to separate files
 - [ ] Use cross-references instead of repeating content from other skills
 - [ ] Compress examples — one excellent example beats many mediocre ones
+- [ ] When SKILL.md exceeds ~500 words for a standard skill, the heaviest section is almost always inlined reference material — extract it
+
+### Coverage Contracts vs. Rule Restatement
+
+When a skill author wants to enforce that the agent doesn't skip rules, the temptation is to inline each rule with its full rationale. Don't.
+
+- **Coverage contract pattern:** Keep one-line checklist items in SKILL.md naming each rule and citing the reference file (e.g., `"Lifecycle choice — apply large-table rule in references/schema-rules.md"`). Add one sentence: "Silence on any item is itself a review gap." Close the silent-skip loophole with: "If a reference file is unavailable, say so explicitly rather than skipping it."
+- **Anti-pattern:** Multi-paragraph items that restate rules and rationale already in `references/`. The fix is structural — the prose is in the wrong file, not the wrong shape. Move operational procedures (how to locate files, `find` syntax, what to record) and output-format meta (citation discipline worked examples) into `references/`. Mark that file as always-load.
 
 ### Structure
 
@@ -115,6 +123,8 @@ Match specificity to the task's fragility:
 | Version printing instructions | Fragile, rely on git history |
 | Hardcoded local paths | Machine-specific, not portable |
 | Description summarizes workflow | the agent follows description, skips SKILL.md body |
+| SKILL.md inlines rule prose that also lives in `references/` | Two sources of truth — the inline copy drifts from the canonical reference; agent applies the SKILL.md version and ignores the more detailed reference |
+| SKILL.md embeds operational procedures or worked-example pairs | Procedures (how to locate files, `find` syntax, what to record) and output-format meta (citation discipline examples) belong in `references/` per progressive disclosure |
 
 ## Discipline-Enforcing Skills (Additional Checks)
 
@@ -125,3 +135,5 @@ For skills that enforce rules (TDD, verification, coding standards):
 - [ ] Red flags list for self-checking
 - [ ] "Spirit vs letter" addressed: "Violating the letter IS violating the spirit"
 - [ ] Hard gates at critical decision points
+- [ ] Discipline patterns (output-format meta, citation examples, verification procedures) live in `references/` — SKILL.md names them in one line and cites the file
+- [ ] Discipline reference file is marked as always-load so the agent cannot bypass it (don't inline to guarantee coverage — mark as unmissable instead)