Skip to content

Commit c44a9b7

Browse files
committed
style: fix biome formatting
1 parent 008527f commit c44a9b7

5 files changed

Lines changed: 74 additions & 80 deletions

File tree

src/entrypoints/cli/commands/evaluate.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,7 @@ export async function evaluate(
429429
case 'layout': {
430430
const strategyId = args[1]
431431
if (!strategyId) {
432-
console.error(
433-
'Usage: evaluate layout <strategy-id> [--out-dir <path>]',
434-
)
432+
console.error('Usage: evaluate layout <strategy-id> [--out-dir <path>]')
435433
return 1
436434
}
437435

@@ -466,11 +464,8 @@ export async function evaluate(
466464
return 1
467465
}
468466

469-
const {
470-
layoutQualityKind,
471-
setLayoutJudge,
472-
createBedrockLayoutJudge,
473-
} = await import('../../../services/evaluation')
467+
const { layoutQualityKind, setLayoutJudge, createBedrockLayoutJudge } =
468+
await import('../../../services/evaluation')
474469
const { OPUS_MODEL_ID } = await import('../../../services/extraction')
475470

476471
const judge = createBedrockLayoutJudge(OPUS_MODEL_ID)

src/services/evaluation/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,21 @@ export {
1111
export { runEvaluation } from './harness'
1212
// Judges
1313
export { createBedrockFieldJudge } from './judge'
14-
export { createBedrockLayoutJudge } from './layout-judge'
1514
// Kinds
1615
export {
17-
layoutQualityKind,
18-
setLayoutJudge,
1916
type LayoutJudge,
2017
type LayoutJudgeResponse,
2118
type LayoutQualityOutput,
19+
layoutQualityKind,
20+
setLayoutJudge,
2221
} from './kinds/layout-quality'
2322
export {
2423
type ExtractionOutput,
2524
pdfFieldExtractionKind,
2625
} from './kinds/pdf-field-extraction'
2726
export { createLlmJudgeKind } from './kinds/pdf-field-extraction-judge'
2827
export { shapingCommandsKind } from './kinds/shaping-commands'
28+
export { createBedrockLayoutJudge } from './layout-judge'
2929
// Layout judge prompt
3030
export { buildLayoutJudgePrompt } from './layout-judge-prompt'
3131
export { evaluationRunSchema } from './schemas'

src/services/evaluation/kinds/layout-quality.ts

Lines changed: 64 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -29,78 +29,76 @@ const DIMENSIONS = [
2929

3030
let currentJudge: LayoutJudge | undefined
3131

32-
export const layoutQualityKind: EvaluationKind<
33-
LayoutQualityOutput,
34-
undefined
35-
> = {
36-
id: 'layout-quality',
37-
description:
38-
'Evaluates FormSpec layout quality using LLM-as-judge against a civic tech best practices rubric',
39-
40-
async score(output: LayoutQualityOutput): Promise<CaseMetrics> {
41-
if (!currentJudge) {
42-
throw new Error(
43-
'layoutQualityKind: judge not set. Call setLayoutJudge() before scoring.',
44-
)
45-
}
46-
47-
const response = await currentJudge.judge(output.spec, output.formSpec)
48-
49-
const metrics: Record<string, number> = {}
50-
let total = 0
51-
let count = 0
52-
53-
for (const dim of DIMENSIONS) {
54-
const entry = response.scores[dim]
55-
if (entry) {
56-
const normalized = (entry.score - 1) / 4 // 1-5 → 0-1
57-
metrics[dim] = normalized
58-
total += normalized
59-
count++
32+
export const layoutQualityKind: EvaluationKind<LayoutQualityOutput, undefined> =
33+
{
34+
id: 'layout-quality',
35+
description:
36+
'Evaluates FormSpec layout quality using LLM-as-judge against a civic tech best practices rubric',
37+
38+
async score(output: LayoutQualityOutput): Promise<CaseMetrics> {
39+
if (!currentJudge) {
40+
throw new Error(
41+
'layoutQualityKind: judge not set. Call setLayoutJudge() before scoring.',
42+
)
6043
}
61-
}
62-
63-
metrics.overall = count > 0 ? total / count : 0
64-
65-
return {
66-
fixture: '',
67-
metrics,
68-
details: {
69-
rawScores: response.scores,
70-
pageCount: output.formSpec.pages.length,
71-
fieldCount: output.spec.groups.reduce(
72-
(sum, g) => sum + g.requirements.length,
73-
0,
74-
),
75-
groupCount: output.spec.groups.length,
76-
},
77-
}
78-
},
79-
80-
summarize(cases: CaseMetrics[]): SummaryMetrics {
81-
if (cases.length === 0) return { metrics: {} }
82-
83-
const metricKeys = new Set<string>()
84-
for (const c of cases) {
85-
for (const key of Object.keys(c.metrics)) metricKeys.add(key)
86-
}
87-
88-
const metrics: Record<string, number> = {}
89-
for (const key of metricKeys) {
90-
let sum = 0
44+
45+
const response = await currentJudge.judge(output.spec, output.formSpec)
46+
47+
const metrics: Record<string, number> = {}
48+
let total = 0
9149
let count = 0
92-
for (const c of cases) {
93-
if (key in c.metrics) {
94-
sum += c.metrics[key]
50+
51+
for (const dim of DIMENSIONS) {
52+
const entry = response.scores[dim]
53+
if (entry) {
54+
const normalized = (entry.score - 1) / 4 // 1-5 → 0-1
55+
metrics[dim] = normalized
56+
total += normalized
9557
count++
9658
}
9759
}
98-
metrics[key] = count > 0 ? sum / count : 0
99-
}
10060

101-
return { metrics }
102-
},
103-
}
61+
metrics.overall = count > 0 ? total / count : 0
62+
63+
return {
64+
fixture: '',
65+
metrics,
66+
details: {
67+
rawScores: response.scores,
68+
pageCount: output.formSpec.pages.length,
69+
fieldCount: output.spec.groups.reduce(
70+
(sum, g) => sum + g.requirements.length,
71+
0,
72+
),
73+
groupCount: output.spec.groups.length,
74+
},
75+
}
76+
},
77+
78+
summarize(cases: CaseMetrics[]): SummaryMetrics {
79+
if (cases.length === 0) return { metrics: {} }
80+
81+
const metricKeys = new Set<string>()
82+
for (const c of cases) {
83+
for (const key of Object.keys(c.metrics)) metricKeys.add(key)
84+
}
85+
86+
const metrics: Record<string, number> = {}
87+
for (const key of metricKeys) {
88+
let sum = 0
89+
let count = 0
90+
for (const c of cases) {
91+
if (key in c.metrics) {
92+
sum += c.metrics[key]
93+
count++
94+
}
95+
}
96+
metrics[key] = count > 0 ? sum / count : 0
97+
}
98+
99+
return { metrics }
100+
},
101+
}
104102

105103
export function setLayoutJudge(judge: LayoutJudge): void {
106104
currentJudge = judge

src/services/extraction/registry.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ export function createExtractorRegistry(
128128
'Hybrid extraction prompt with layout-aware FormSpec generation. Step 2 uses civic tech best practices (GOV.UK, USDS, Code for America) for adaptive page sizing, topic cohesion, and progressive disclosure.',
129129
status: 'experimental',
130130
courseTopics: ['evaluation', 'prompt-optimization', 'form-design'],
131-
catalogPath: '/catalog/experiments/layout-quality/sonnet-hybrid-layout-v1',
131+
catalogPath:
132+
'/catalog/experiments/layout-quality/sonnet-hybrid-layout-v1',
132133
modelId: SONNET_MODEL_ID,
133134
pricing: { inputPer1k: 0.003, outputPer1k: 0.015 },
134135
},

test/evaluation/layout-quality.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import { describe, expect, test } from 'bun:test'
2+
import type { DataCollectionSpec } from '../../src/services/data-collection'
23
import {
3-
layoutQualityKind,
44
type LayoutJudge,
5+
layoutQualityKind,
56
setLayoutJudge,
67
} from '../../src/services/evaluation/kinds/layout-quality'
7-
import type { DataCollectionSpec } from '../../src/services/data-collection'
88
import type { FormSpec } from '../../src/services/forms'
99

1010
describe('layoutQualityKind', () => {

0 commit comments

Comments
 (0)