Skip to content

Commit db9cb4d

Browse files
committed
refactor(evaluation): use factory pattern for layout quality kind
Replace module-level mutable state (setLayoutJudge) with a factory function (createLayoutQualityKind) that takes the judge as a parameter. Consistent with the existing createLlmJudgeKind pattern.
1 parent e8b0228 commit db9cb4d

4 files changed

Lines changed: 37 additions & 62 deletions

File tree

src/entrypoints/cli/commands/evaluate.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,12 +464,12 @@ export async function evaluate(
464464
return 1
465465
}
466466

467-
const { layoutQualityKind, setLayoutJudge, createBedrockLayoutJudge } =
467+
const { createLayoutQualityKind, createBedrockLayoutJudge } =
468468
await import('../../../services/evaluation')
469469
const { OPUS_MODEL_ID } = await import('../../../services/extraction')
470470

471471
const judge = createBedrockLayoutJudge(OPUS_MODEL_ID)
472-
setLayoutJudge(judge)
472+
const layoutQualityKind = createLayoutQualityKind(judge)
473473

474474
const cacheDbPath = process.env.CACHE_DB_PATH ?? 'data/cache.sqlite'
475475
mkdirSync('data', { recursive: true })

src/services/evaluation/index.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ export { runEvaluation } from './harness'
1313
export { createBedrockFieldJudge } from './judge'
1414
// Kinds
1515
export {
16+
createLayoutQualityKind,
1617
type LayoutJudge,
1718
type LayoutJudgeResponse,
1819
type LayoutQualityOutput,
19-
layoutQualityKind,
20-
setLayoutJudge,
2120
} from './kinds/layout-quality'
2221
export {
2322
type ExtractionOutput,

src/services/evaluation/kinds/layout-quality.ts

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,22 @@ const DIMENSIONS = [
2727
'deliveryModeChoice',
2828
] as const
2929

30-
let currentJudge: LayoutJudge | undefined
31-
32-
export const layoutQualityKind: EvaluationKind<LayoutQualityOutput, undefined> =
33-
{
30+
/**
31+
* Create a layout quality evaluation kind with the given judge.
32+
*
33+
* Follows the same factory pattern as `createLlmJudgeKind` —
34+
* the judge is injected at construction, not via mutable state.
35+
*/
36+
export function createLayoutQualityKind(
37+
judge: LayoutJudge,
38+
): EvaluationKind<LayoutQualityOutput, undefined> {
39+
return {
3440
id: 'layout-quality',
3541
description:
3642
'Evaluates FormSpec layout quality using LLM-as-judge against a civic tech best practices rubric',
3743

3844
async score(output: LayoutQualityOutput): Promise<CaseMetrics> {
39-
if (!currentJudge) {
40-
throw new Error(
41-
'layoutQualityKind: judge not set. Call setLayoutJudge() before scoring.',
42-
)
43-
}
44-
45-
const response = await currentJudge.judge(output.spec, output.formSpec)
45+
const response = await judge.judge(output.spec, output.formSpec)
4646

4747
const metrics: Record<string, number> = {}
4848
let total = 0
@@ -99,7 +99,4 @@ export const layoutQualityKind: EvaluationKind<LayoutQualityOutput, undefined> =
9999
return { metrics }
100100
},
101101
}
102-
103-
export function setLayoutJudge(judge: LayoutJudge): void {
104-
currentJudge = judge
105102
}

test/evaluation/layout-quality.test.ts

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
11
import { describe, expect, test } from 'bun:test'
22
import type { DataCollectionSpec } from '../../src/services/data-collection'
33
import {
4+
createLayoutQualityKind,
45
type LayoutJudge,
5-
layoutQualityKind,
6-
setLayoutJudge,
76
} from '../../src/services/evaluation/kinds/layout-quality'
87
import type { FormSpec } from '../../src/services/forms'
98

10-
describe('layoutQualityKind', () => {
9+
describe('createLayoutQualityKind', () => {
10+
const mockJudge: LayoutJudge = {
11+
async judge() {
12+
return {
13+
scores: {
14+
pageSizing: { score: 5, rationale: 'Perfect' },
15+
topicCohesion: { score: 3, rationale: 'Acceptable' },
16+
logicalProgression: { score: 4, rationale: 'Good' },
17+
conditionalUse: { score: 5, rationale: 'N/A' },
18+
titleClarity: { score: 1, rationale: 'Poor' },
19+
deliveryModeChoice: { score: 3, rationale: 'OK' },
20+
},
21+
}
22+
},
23+
}
24+
25+
const kind = createLayoutQualityKind(mockJudge)
26+
1127
test('has correct id and description', () => {
12-
expect(layoutQualityKind.id).toBe('layout-quality')
13-
expect(layoutQualityKind.description).toContain('layout')
28+
expect(kind.id).toBe('layout-quality')
29+
expect(kind.description).toContain('layout')
1430
})
1531

1632
test('summarize averages metrics across cases', () => {
@@ -43,30 +59,14 @@ describe('layoutQualityKind', () => {
4359
},
4460
]
4561

46-
const summary = layoutQualityKind.summarize(cases)
62+
const summary = kind.summarize(cases)
4763

4864
expect(summary.metrics.pageSizing).toBeCloseTo(0.7)
4965
expect(summary.metrics.topicCohesion).toBeCloseTo(0.8)
5066
expect(summary.metrics.overall).toBeCloseTo(0.715)
5167
})
5268

5369
test('score calls judge and normalizes 1-5 to 0-1', async () => {
54-
const mockJudge: LayoutJudge = {
55-
async judge() {
56-
return {
57-
scores: {
58-
pageSizing: { score: 5, rationale: 'Perfect' },
59-
topicCohesion: { score: 3, rationale: 'Acceptable' },
60-
logicalProgression: { score: 4, rationale: 'Good' },
61-
conditionalUse: { score: 5, rationale: 'N/A' },
62-
titleClarity: { score: 1, rationale: 'Poor' },
63-
deliveryModeChoice: { score: 3, rationale: 'OK' },
64-
},
65-
}
66-
},
67-
}
68-
setLayoutJudge(mockJudge)
69-
7070
const spec: DataCollectionSpec = {
7171
id: 'test',
7272
title: 'Test',
@@ -94,7 +94,7 @@ describe('layoutQualityKind', () => {
9494
pages: [{ id: 'page-1', title: 'Page 1', groups: ['g1'] }],
9595
}
9696

97-
const result = await layoutQualityKind.score({ spec, formSpec }, undefined)
97+
const result = await kind.score({ spec, formSpec }, undefined)
9898

9999
// 5 -> 1.0, 3 -> 0.5, 4 -> 0.75, 5 -> 1.0, 1 -> 0.0, 3 -> 0.5
100100
expect(result.metrics.pageSizing).toBeCloseTo(1.0)
@@ -106,25 +106,4 @@ describe('layoutQualityKind', () => {
106106
// overall = (1.0 + 0.5 + 0.75 + 1.0 + 0.0 + 0.5) / 6 = 0.625
107107
expect(result.metrics.overall).toBeCloseTo(0.625)
108108
})
109-
110-
test('score throws if judge not set', async () => {
111-
setLayoutJudge(undefined as unknown as LayoutJudge)
112-
113-
const spec: DataCollectionSpec = {
114-
id: 'x',
115-
title: 'X',
116-
description: '',
117-
groups: [],
118-
}
119-
const formSpec: FormSpec = {
120-
id: 'form-x',
121-
specId: 'x',
122-
title: 'X',
123-
pages: [],
124-
}
125-
126-
expect(
127-
layoutQualityKind.score({ spec, formSpec }, undefined),
128-
).rejects.toThrow()
129-
})
130109
})

0 commit comments

Comments
 (0)