refactor(evaluation): use factory pattern for layout quality kind

danielnaab · danielnaab · commit db9cb4de33ba · 2026-05-07T05:25:43.000Z
Replace module-level mutable state (setLayoutJudge) with a factory
function (createLayoutQualityKind) that takes the judge as a parameter.
Consistent with the existing createLlmJudgeKind pattern.
diff --git a/src/entrypoints/cli/commands/evaluate.ts b/src/entrypoints/cli/commands/evaluate.ts
@@ -464,12 +464,12 @@ export async function evaluate(
         return 1
       }
 
-      const { layoutQualityKind, setLayoutJudge, createBedrockLayoutJudge } =
+      const { createLayoutQualityKind, createBedrockLayoutJudge } =
         await import('../../../services/evaluation')
       const { OPUS_MODEL_ID } = await import('../../../services/extraction')
 
       const judge = createBedrockLayoutJudge(OPUS_MODEL_ID)
-      setLayoutJudge(judge)
+      const layoutQualityKind = createLayoutQualityKind(judge)
 
       const cacheDbPath = process.env.CACHE_DB_PATH ?? 'data/cache.sqlite'
       mkdirSync('data', { recursive: true })
diff --git a/src/services/evaluation/index.ts b/src/services/evaluation/index.ts
@@ -13,11 +13,10 @@ export { runEvaluation } from './harness'
 export { createBedrockFieldJudge } from './judge'
 // Kinds
 export {
+  createLayoutQualityKind,
   type LayoutJudge,
   type LayoutJudgeResponse,
   type LayoutQualityOutput,
-  layoutQualityKind,
-  setLayoutJudge,
 } from './kinds/layout-quality'
 export {
   type ExtractionOutput,
diff --git a/src/services/evaluation/kinds/layout-quality.ts b/src/services/evaluation/kinds/layout-quality.ts
@@ -27,22 +27,22 @@ const DIMENSIONS = [
   'deliveryModeChoice',
 ] as const
 
-let currentJudge: LayoutJudge | undefined
-
-export const layoutQualityKind: EvaluationKind<LayoutQualityOutput, undefined> =
-  {
+/**
+ * Create a layout quality evaluation kind with the given judge.
+ *
+ * Follows the same factory pattern as `createLlmJudgeKind` —
+ * the judge is injected at construction, not via mutable state.
+ */
+export function createLayoutQualityKind(
+  judge: LayoutJudge,
+): EvaluationKind<LayoutQualityOutput, undefined> {
+  return {
     id: 'layout-quality',
     description:
       'Evaluates FormSpec layout quality using LLM-as-judge against a civic tech best practices rubric',
 
     async score(output: LayoutQualityOutput): Promise<CaseMetrics> {
-      if (!currentJudge) {
-        throw new Error(
-          'layoutQualityKind: judge not set. Call setLayoutJudge() before scoring.',
-        )
-      }
-
-      const response = await currentJudge.judge(output.spec, output.formSpec)
+      const response = await judge.judge(output.spec, output.formSpec)
 
       const metrics: Record<string, number> = {}
       let total = 0
@@ -99,7 +99,4 @@ export const layoutQualityKind: EvaluationKind<LayoutQualityOutput, undefined> =
       return { metrics }
     },
   }
-
-export function setLayoutJudge(judge: LayoutJudge): void {
-  currentJudge = judge
 }
diff --git a/test/evaluation/layout-quality.test.ts b/test/evaluation/layout-quality.test.ts
@@ -1,16 +1,32 @@
 import { describe, expect, test } from 'bun:test'
 import type { DataCollectionSpec } from '../../src/services/data-collection'
 import {
+  createLayoutQualityKind,
   type LayoutJudge,
-  layoutQualityKind,
-  setLayoutJudge,
 } from '../../src/services/evaluation/kinds/layout-quality'
 import type { FormSpec } from '../../src/services/forms'
 
-describe('layoutQualityKind', () => {
+describe('createLayoutQualityKind', () => {
+  const mockJudge: LayoutJudge = {
+    async judge() {
+      return {
+        scores: {
+          pageSizing: { score: 5, rationale: 'Perfect' },
+          topicCohesion: { score: 3, rationale: 'Acceptable' },
+          logicalProgression: { score: 4, rationale: 'Good' },
+          conditionalUse: { score: 5, rationale: 'N/A' },
+          titleClarity: { score: 1, rationale: 'Poor' },
+          deliveryModeChoice: { score: 3, rationale: 'OK' },
+        },
+      }
+    },
+  }
+
+  const kind = createLayoutQualityKind(mockJudge)
+
   test('has correct id and description', () => {
-    expect(layoutQualityKind.id).toBe('layout-quality')
-    expect(layoutQualityKind.description).toContain('layout')
+    expect(kind.id).toBe('layout-quality')
+    expect(kind.description).toContain('layout')
   })
 
   test('summarize averages metrics across cases', () => {
@@ -43,30 +59,14 @@ describe('layoutQualityKind', () => {
       },
     ]
 
-    const summary = layoutQualityKind.summarize(cases)
+    const summary = kind.summarize(cases)
 
     expect(summary.metrics.pageSizing).toBeCloseTo(0.7)
     expect(summary.metrics.topicCohesion).toBeCloseTo(0.8)
     expect(summary.metrics.overall).toBeCloseTo(0.715)
   })
 
   test('score calls judge and normalizes 1-5 to 0-1', async () => {
-    const mockJudge: LayoutJudge = {
-      async judge() {
-        return {
-          scores: {
-            pageSizing: { score: 5, rationale: 'Perfect' },
-            topicCohesion: { score: 3, rationale: 'Acceptable' },
-            logicalProgression: { score: 4, rationale: 'Good' },
-            conditionalUse: { score: 5, rationale: 'N/A' },
-            titleClarity: { score: 1, rationale: 'Poor' },
-            deliveryModeChoice: { score: 3, rationale: 'OK' },
-          },
-        }
-      },
-    }
-    setLayoutJudge(mockJudge)
-
     const spec: DataCollectionSpec = {
       id: 'test',
       title: 'Test',
@@ -94,7 +94,7 @@ describe('layoutQualityKind', () => {
       pages: [{ id: 'page-1', title: 'Page 1', groups: ['g1'] }],
     }
 
-    const result = await layoutQualityKind.score({ spec, formSpec }, undefined)
+    const result = await kind.score({ spec, formSpec }, undefined)
 
     // 5 -> 1.0, 3 -> 0.5, 4 -> 0.75, 5 -> 1.0, 1 -> 0.0, 3 -> 0.5
     expect(result.metrics.pageSizing).toBeCloseTo(1.0)
@@ -106,25 +106,4 @@ describe('layoutQualityKind', () => {
     // overall = (1.0 + 0.5 + 0.75 + 1.0 + 0.0 + 0.5) / 6 = 0.625
     expect(result.metrics.overall).toBeCloseTo(0.625)
   })
-
-  test('score throws if judge not set', async () => {
-    setLayoutJudge(undefined as unknown as LayoutJudge)
-
-    const spec: DataCollectionSpec = {
-      id: 'x',
-      title: 'X',
-      description: '',
-      groups: [],
-    }
-    const formSpec: FormSpec = {
-      id: 'form-x',
-      specId: 'x',
-      title: 'X',
-      pages: [],
-    }
-
-    expect(
-      layoutQualityKind.score({ spec, formSpec }, undefined),
-    ).rejects.toThrow()
-  })
 })