Skip to content

Commit fa8fe36

Browse files
committed
fix(form-documents): include createdAt/updatedAt in layout prompt schema
The formSpecSchema requires these fields. Also commits baseline and layout variant evaluation results showing +17.7% overall improvement.
1 parent 515d1bb commit fa8fe36

3 files changed

Lines changed: 385 additions & 1 deletion

File tree

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
{
2+
"kind": "layout-quality",
3+
"implementation": "sonnet-hybrid-layout-v1",
4+
"specVersion": "2026-05-06",
5+
"status": "current",
6+
"timestamp": "2026-05-06T07:49:47.601Z",
7+
"model": "Claude Sonnet 4 (hybrid + layout)",
8+
"summary": {
9+
"pageSizing": 0.8125,
10+
"topicCohesion": 0.875,
11+
"logicalProgression": 0.875,
12+
"conditionalUse": 0.375,
13+
"titleClarity": 1,
14+
"deliveryModeChoice": 0.5625,
15+
"overall": 0.75
16+
},
17+
"cases": [
18+
{
19+
"fixture": "pardon-application",
20+
"metrics": {
21+
"pageSizing": 0.5,
22+
"topicCohesion": 0.75,
23+
"logicalProgression": 0.75,
24+
"conditionalUse": 0.25,
25+
"titleClarity": 1,
26+
"deliveryModeChoice": 0.5,
27+
"overall": 0.625
28+
},
29+
"details": {
30+
"rawScores": {
31+
"pageSizing": {
32+
"score": 3,
33+
"rationale": "Page 1 has 32 fields which is quite large for a single page, while pages like page 4 and page 7 have only 1-5 fields; the distribution is uneven though splitting page 1 further could be warranted."
34+
},
35+
"topicCohesion": {
36+
"score": 4,
37+
"rationale": "Most pages group related topics well, though page 5 combines sobriety/substance use with financial information which are somewhat distinct sensitive topics, and page 2 combines residence history with job history."
38+
},
39+
"logicalProgression": {
40+
"score": 4,
41+
"rationale": "The flow from personal info → living/work history → education/military → community → health/finances → criminal history → reasons → references → certification is logical, though placing reasons for pardon after criminal history rather than before is slightly unusual."
42+
},
43+
"conditionalUse": {
44+
"score": 2,
45+
"rationale": "The DataCollectionSpec has clear conditional fields (military service details conditional on serving, substance use details conditional on having struggled, previous application dates conditional on having applied before) but no page-level conditions are used anywhere."
46+
},
47+
"titleClarity": {
48+
"score": 5,
49+
"rationale": "All page titles are plain-language, conversational, and clearly communicate what the user will be asked (e.g., 'Tell us about yourself', 'Why you're seeking a pardon', 'Sign and submit your application')."
50+
},
51+
"deliveryModeChoice": {
52+
"score": 3,
53+
"rationale": "Page 5 (substance use and finances) appropriately uses conversational mode for sensitive topics, and page 3 uses hybrid for conditional military content, but page 6 (criminal history with complex narrative fields about conduct and responsibility) being static is suboptimal, and the reasons-for-pardon page could also benefit from conversational delivery."
54+
}
55+
},
56+
"pageCount": 9,
57+
"fieldCount": 128,
58+
"groupCount": 13
59+
}
60+
},
61+
{
62+
"fixture": "i-9",
63+
"metrics": {
64+
"pageSizing": 0.75,
65+
"topicCohesion": 1,
66+
"logicalProgression": 0.75,
67+
"conditionalUse": 0.25,
68+
"titleClarity": 1,
69+
"deliveryModeChoice": 0.5,
70+
"overall": 0.7083333333333334
71+
},
72+
"details": {
73+
"rawScores": {
74+
"pageSizing": {
75+
"score": 4,
76+
"rationale": "Page 1 has 20 fields and page 2 has 20 fields which is on the larger side but acceptable for a government form; pages 3 and 4 have 9-12 fields which is well-sized."
77+
},
78+
"topicCohesion": {
79+
"score": 5,
80+
"rationale": "Each page maps directly to one logical group from the I-9 form structure, maintaining perfect topic cohesion within each page."
81+
},
82+
"logicalProgression": {
83+
"score": 4,
84+
"rationale": "The flow follows the official I-9 section order logically, though placing the preparer/translator section after employer verification rather than after employee information slightly deviates from the actual form completion sequence."
85+
},
86+
"conditionalUse": {
87+
"score": 2,
88+
"rationale": "Pages 3 and 4 are clearly conditional (only needed if a preparer assisted or for reverification/rehire) but have no page-level conditions defined; additionally, immigration-related fields in page 1 could benefit from conditional logic based on citizenship status."
89+
},
90+
"titleClarity": {
91+
"score": 5,
92+
"rationale": "Titles like 'Tell us about yourself,' 'Employer document review,' and 'Preparer or translator assistance' are plain-language, descriptive, and help users immediately understand each page's purpose."
93+
},
94+
"deliveryModeChoice": {
95+
"score": 3,
96+
"rationale": "Page 1 appropriately uses hybrid mode given its mix of simple identity fields and complex citizenship/immigration attestation, but pages 3 and 4 could benefit from conversational mode since they involve conditional logic about whether they apply at all."
97+
}
98+
},
99+
"pageCount": 4,
100+
"fieldCount": 61,
101+
"groupCount": 4
102+
}
103+
},
104+
{
105+
"fixture": "w-9",
106+
"metrics": {
107+
"pageSizing": 1,
108+
"topicCohesion": 0.75,
109+
"logicalProgression": 1,
110+
"conditionalUse": 0.5,
111+
"titleClarity": 1,
112+
"deliveryModeChoice": 0.5,
113+
"overall": 0.7916666666666666
114+
},
115+
"details": {
116+
"rawScores": {
117+
"pageSizing": {
118+
"score": 5,
119+
"rationale": "19 fields across 4 pages yields an average of ~5 fields per page, which is well-balanced for this form's complexity."
120+
},
121+
"topicCohesion": {
122+
"score": 4,
123+
"rationale": "Most pages have clear topical focus, though combining entity-information and exemptions on page 1 mixes two distinct groups—albeit related enough to work together."
124+
},
125+
"logicalProgression": {
126+
"score": 5,
127+
"rationale": "The flow from entity identification to address to TIN to certification/signature follows the natural W-9 order and moves from easier to more sensitive information."
128+
},
129+
"conditionalUse": {
130+
"score": 3,
131+
"rationale": "The LLC tax classification field is conditional on selecting LLC, and the foreign partners indicator applies only to certain entity types, yet no page-level conditions are used to handle these cases."
132+
},
133+
"titleClarity": {
134+
"score": 5,
135+
"rationale": "All page titles are plain-language, action-oriented, and clearly communicate what information the user will provide on each page."
136+
},
137+
"deliveryModeChoice": {
138+
"score": 3,
139+
"rationale": "Page 1 uses hybrid which is reasonable given the conditional LLC classification, but pages 3 and 4 could benefit from conversational mode since TIN entry requires choosing between SSN/EIN and certification involves understanding legal statements."
140+
}
141+
},
142+
"pageCount": 4,
143+
"fieldCount": 19,
144+
"groupCount": 5
145+
}
146+
},
147+
{
148+
"fixture": "snap-wisconsin",
149+
"metrics": {
150+
"pageSizing": 1,
151+
"topicCohesion": 1,
152+
"logicalProgression": 1,
153+
"conditionalUse": 0.5,
154+
"titleClarity": 1,
155+
"deliveryModeChoice": 0.75,
156+
"overall": 0.875
157+
},
158+
"details": {
159+
"rawScores": {
160+
"pageSizing": {
161+
"score": 5,
162+
"rationale": "Each page has 6-9 fields, which is appropriate for a 43-field form spread across 6 pages—neither too dense nor over-paginated."
163+
},
164+
"topicCohesion": {
165+
"score": 5,
166+
"rationale": "Each page maps directly to one cohesive data group (personal info, household, income, assets, expenses, signature), maintaining clear topical focus."
167+
},
168+
"logicalProgression": {
169+
"score": 5,
170+
"rationale": "The flow moves naturally from identity → household → income → assets → expenses → certification/signature, following standard benefits application logic and building from simple to sensitive."
171+
},
172+
"conditionalUse": {
173+
"score": 3,
174+
"rationale": "The DataCollectionSpec has optional household members and conditional-like fields (e.g., self-employment, authorized representative) that could benefit from page-level conditions, but none are used."
175+
},
176+
"titleClarity": {
177+
"score": 5,
178+
"rationale": "All titles use plain, friendly language ('Tell us about yourself', 'Your monthly expenses') that clearly communicates what the user will be asked on each page."
179+
},
180+
"deliveryModeChoice": {
181+
"score": 4,
182+
"rationale": "Income and expenses are appropriately set to hybrid given their conditional complexity, and the signature page uses conversational mode for guidance, though assets could also benefit from hybrid mode given vehicle/property conditionality."
183+
}
184+
},
185+
"pageCount": 6,
186+
"fieldCount": 43,
187+
"groupCount": 6
188+
}
189+
}
190+
]
191+
}
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
{
2+
"kind": "layout-quality",
3+
"implementation": "sonnet-hybrid-v1",
4+
"specVersion": "2026-05-06",
5+
"status": "current",
6+
"timestamp": "2026-05-06T07:41:31.821Z",
7+
"model": "Claude Sonnet 4 (hybrid prompt)",
8+
"summary": {
9+
"pageSizing": 0.5,
10+
"topicCohesion": 0.5,
11+
"logicalProgression": 0.75,
12+
"conditionalUse": 0.375,
13+
"titleClarity": 0.5625,
14+
"deliveryModeChoice": 0.75,
15+
"overall": 0.5729166666666666
16+
},
17+
"cases": [
18+
{
19+
"fixture": "pardon-application",
20+
"metrics": {
21+
"pageSizing": 0.5,
22+
"topicCohesion": 0.5,
23+
"logicalProgression": 0.75,
24+
"conditionalUse": 0.25,
25+
"titleClarity": 0.75,
26+
"deliveryModeChoice": 0.75,
27+
"overall": 0.5833333333333334
28+
},
29+
"details": {
30+
"rawScores": {
31+
"pageSizing": {
32+
"score": 3,
33+
"rationale": "Page 1 has 32 fields which is quite large for a single page, while pages like page 8 (10 fields) and page 6 (16 fields) are more appropriately sized; the background-information group probably should have been split across multiple pages."
34+
},
35+
"topicCohesion": {
36+
"score": 3,
37+
"rationale": "Some pages combine loosely related topics (e.g., substance use with finances on page 5, education with residence on page 3) that don't share a natural thematic connection, though pages 6, 7, and 8 are well-focused."
38+
},
39+
"logicalProgression": {
40+
"score": 4,
41+
"rationale": "The flow is generally logical—starting with personal info, moving through life circumstances, then to criminal history, and ending with legal certifications—though placing reasons for pardon before case details feels slightly premature."
42+
},
43+
"conditionalUse": {
44+
"score": 2,
45+
"rationale": "Several groups have clear conditional fields (military service depends on 'have you served,' substance use depends on 'have you struggled,' previous application details depend on 'yes' answer) but no page-level conditions are used to skip irrelevant sections."
46+
},
47+
"titleClarity": {
48+
"score": 4,
49+
"rationale": "Titles are mostly plain-language and descriptive (e.g., 'Criminal History and Case Details,' 'Letters of Support'), though 'Health and Financial Information' is slightly misleading since it's about substance use rather than general health."
50+
},
51+
"deliveryModeChoice": {
52+
"score": 4,
53+
"rationale": "Good choices overall—conversational mode for sensitive/complex sections (substance use, criminal history, personal info) and static for certifications and references—though page 1's 32-field background section being conversational could be overwhelming."
54+
}
55+
},
56+
"pageCount": 8,
57+
"fieldCount": 128,
58+
"groupCount": 13
59+
}
60+
},
61+
{
62+
"fixture": "i-9",
63+
"metrics": {
64+
"pageSizing": 0.5,
65+
"topicCohesion": 0.5,
66+
"logicalProgression": 0.75,
67+
"conditionalUse": 0.25,
68+
"titleClarity": 0.5,
69+
"deliveryModeChoice": 0.75,
70+
"overall": 0.5416666666666666
71+
},
72+
"details": {
73+
"rawScores": {
74+
"pageSizing": {
75+
"score": 3,
76+
"rationale": "Page 1 has 20 fields and page 2 has 20 fields which are manageable, but page 3 combines two distinct groups totaling 21 fields which is heavy and could benefit from separation."
77+
},
78+
"topicCohesion": {
79+
"score": 3,
80+
"rationale": "Pages 1 and 2 each address a single clear topic, but page 3 combines the unrelated preparer/translator certification with the reverification/rehire section, reducing cohesion."
81+
},
82+
"logicalProgression": {
83+
"score": 4,
84+
"rationale": "The flow from employee information to employer verification to supplemental sections follows the actual I-9 form structure logically, though the final page mixes two distinct purposes."
85+
},
86+
"conditionalUse": {
87+
"score": 2,
88+
"rationale": "The spec has several conditional fields (USCIS A-Number, I-94, foreign passport, work authorization expiration depending on citizenship status; preparer/translator only if used; reverification only if applicable) but no page-level conditions are used."
89+
},
90+
"titleClarity": {
91+
"score": 3,
92+
"rationale": "Pages 1 and 2 have clear descriptive titles, but 'Additional Certifications' is vague and doesn't clearly communicate that it covers both preparer/translator info and reverification/rehire."
93+
},
94+
"deliveryModeChoice": {
95+
"score": 4,
96+
"rationale": "Using conversational mode for the employee section with its conditional citizenship/immigration questions is appropriate, hybrid for employer document review makes sense, though static for the conditional preparer/reverification section is slightly suboptimal."
97+
}
98+
},
99+
"pageCount": 3,
100+
"fieldCount": 61,
101+
"groupCount": 4
102+
}
103+
},
104+
{
105+
"fixture": "w-9",
106+
"metrics": {
107+
"pageSizing": 0.75,
108+
"topicCohesion": 0.5,
109+
"logicalProgression": 0.75,
110+
"conditionalUse": 0.5,
111+
"titleClarity": 0.5,
112+
"deliveryModeChoice": 0.75,
113+
"overall": 0.625
114+
},
115+
"details": {
116+
"rawScores": {
117+
"pageSizing": {
118+
"score": 4,
119+
"rationale": "Three pages for 19 fields is reasonable, though page 3 has 8 fields (TIN + certification) which is on the heavier side but still manageable."
120+
},
121+
"topicCohesion": {
122+
"score": 3,
123+
"rationale": "Page 1 combines entity info with tax classification (related), but page 2 mixes address with exemptions (less related), and page 3 combines TIN with certification (somewhat related but distinct concerns)."
124+
},
125+
"logicalProgression": {
126+
"score": 4,
127+
"rationale": "The flow from identity/classification to address/exemptions to TIN/certification follows a natural progression ending with the most sensitive and legally binding elements."
128+
},
129+
"conditionalUse": {
130+
"score": 3,
131+
"rationale": "The LLC tax classification field is conditional on selecting LLC, and the foreign partners indicator applies only to certain entity types, yet no page-level conditions are used to handle these scenarios."
132+
},
133+
"titleClarity": {
134+
"score": 3,
135+
"rationale": "Titles like 'Entity and Classification Information' and 'Address and Exemptions' are descriptive but somewhat jargon-heavy; plainer language like 'About You' or 'Your Address' would be more user-friendly."
136+
},
137+
"deliveryModeChoice": {
138+
"score": 4,
139+
"rationale": "Conversational mode for the tax classification page (which has conditional logic around LLC type) is appropriate, static for straightforward address fields is correct, and hybrid for the certification page with both simple TIN entry and complex legal attestations is a reasonable choice."
140+
}
141+
},
142+
"pageCount": 3,
143+
"fieldCount": 19,
144+
"groupCount": 6
145+
}
146+
},
147+
{
148+
"fixture": "snap-wisconsin",
149+
"metrics": {
150+
"pageSizing": 0.25,
151+
"topicCohesion": 0.5,
152+
"logicalProgression": 0.75,
153+
"conditionalUse": 0.5,
154+
"titleClarity": 0.5,
155+
"deliveryModeChoice": 0.75,
156+
"overall": 0.5416666666666666
157+
},
158+
"details": {
159+
"rawScores": {
160+
"pageSizing": {
161+
"score": 2,
162+
"rationale": "Each page contains 13-17 fields, which is quite dense for a single view—especially page 1 with 17 fields spanning personal info and household members—making the form feel overwhelming."
163+
},
164+
"topicCohesion": {
165+
"score": 3,
166+
"rationale": "Each page combines two related-but-distinct groups (e.g., applicant info + household, income + assets, expenses + signature), which dilutes topical focus even though the pairings are somewhat logical."
167+
},
168+
"logicalProgression": {
169+
"score": 4,
170+
"rationale": "The flow from personal information to financial details to expenses and certification follows a natural and expected order for a benefits application."
171+
},
172+
"conditionalUse": {
173+
"score": 3,
174+
"rationale": "The household composition fields and some conditional elements (authorized representative, self-employment) could benefit from page-level conditions, but none are used."
175+
},
176+
"titleClarity": {
177+
"score": 3,
178+
"rationale": "Titles like 'Personal Information' and 'Income and Resources' are clear but generic, and 'Expenses and Certification' awkwardly combines two distinct concepts in one title."
179+
},
180+
"deliveryModeChoice": {
181+
"score": 4,
182+
"rationale": "Using conversational mode for income/resources (which can be complex and variable) is a good choice, and hybrid for the mixed pages is reasonable, though the signature page might benefit from static delivery."
183+
}
184+
},
185+
"pageCount": 3,
186+
"fieldCount": 43,
187+
"groupCount": 6
188+
}
189+
}
190+
]
191+
}

src/services/form-documents/layout-prompt.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ Return ONLY valid JSON (no markdown fences, no explanation) matching this schema
7676
"groups": ["group-id-1", "group-id-2"],
7777
"deliveryMode": "static | conversational | hybrid"
7878
}
79-
]
79+
],
80+
"createdAt": "${new Date().toISOString()}",
81+
"updatedAt": "${new Date().toISOString()}"
8082
}
8183
8284
Each page's "groups" array references group IDs from the DataCollectionSpec. Every group must appear in exactly one page.

0 commit comments

Comments
 (0)