Skip to content

Commit e3eaed9

Browse files
authored
feat: add public DOCX evidence contracts and source anchors for layout (#3000)
* feat: add public DOCX evidence contracts and source anchors for layout * chore: update lock file * fix: invalidate paint cache for source anchor changes
1 parent baca2c6 commit e3eaed9

38 files changed

Lines changed: 1706 additions & 14 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ packages/sdk/tools/*.json
109109

110110
.playwright-cli/
111111
.prqrc.json
112+
tmp/
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# DOCX Evidence Contracts
2+
3+
Worker-safe public artifact contracts for DOCX render evidence.
4+
5+
This package is deliberately limited to the JSON handshake SuperDoc can emit and
6+
other systems can read:
7+
8+
- document, fragment, render-subject, run, and artifact identities
9+
- source refs and source anchors
10+
- minimal comparison observations
11+
- minimal signature and cluster records
12+
- deterministic stable ID helpers
13+
- Zod validators for the public shapes
14+
15+
This package must stay free of runtime implementation and product policy. Do not
16+
add report generation, analysis heuristics, persistence workflows, reduction
17+
workflows, internal feature maps, Labs service internals, SuperDoc renderer
18+
internals, filesystem APIs, process APIs, artifact-store clients, or network
19+
clients.
20+
21+
Richer DOCX analysis contracts and implementation details belong in private
22+
internal packages. SuperDoc should only publish the narrow evidence shapes needed
23+
for interoperability.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"name": "@superdoc/docx-evidence-contracts",
3+
"version": "0.1.0",
4+
"description": "Worker-safe public DOCX evidence artifact contracts.",
5+
"type": "module",
6+
"private": true,
7+
"main": "./dist/index.js",
8+
"types": "./dist/index.d.ts",
9+
"exports": {
10+
".": {
11+
"types": "./dist/index.d.ts",
12+
"source": "./src/index.ts",
13+
"default": "./dist/index.js"
14+
}
15+
},
16+
"files": [
17+
"dist",
18+
"README.md"
19+
],
20+
"scripts": {
21+
"build": "tsc --project tsconfig.json",
22+
"test": "vitest run"
23+
},
24+
"dependencies": {
25+
"zod": "^4.3.6"
26+
},
27+
"devDependencies": {
28+
"vitest": "catalog:"
29+
}
30+
}
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import { describe, expect, it } from 'vitest';
2+
import { readdirSync, readFileSync } from 'node:fs';
3+
import path from 'node:path';
4+
import { fileURLToPath } from 'node:url';
5+
import {
6+
clusterRecordSchema,
7+
comparisonObservationSchema,
8+
createStableId,
9+
parseClusterRecord,
10+
parseComparisonObservation,
11+
parseSignatureRecord,
12+
renderSubjectSchema,
13+
sourceAnchorSchema,
14+
signatureRecordSchema,
15+
} from './index.js';
16+
17+
const packageRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
18+
const sourceRoot = path.join(packageRoot, 'src');
19+
20+
const sourceDocument = {
21+
sourceKey: 'corpus/basic/table.docx',
22+
originalSha256: 'sha256-original',
23+
normalizedSha256: 'sha256-normalized',
24+
};
25+
26+
const sourceAnchor = {
27+
sourceNodeId: 'node-table-1',
28+
occurrenceId: 'occurrence-table-1',
29+
rawFactIds: ['raw-w-tbl-1'],
30+
schemaQNames: [
31+
{
32+
qName: 'w:tbl',
33+
namespaceUri: 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
34+
localName: 'tbl',
35+
},
36+
],
37+
featureKey: 'tables',
38+
conceptKey: 'docx.table',
39+
sourceRef: {
40+
partUri: '/word/document.xml',
41+
xpathLikePath: '/w:document[1]/w:body[1]/w:tbl[1]',
42+
rawFactId: 'raw-w-tbl-1',
43+
occurrenceId: 'occurrence-table-1',
44+
},
45+
anchorConfidence: 'high',
46+
flowBlockId: 'flow-table-1',
47+
};
48+
49+
const observation = {
50+
observationId: 'observation_1',
51+
schemaVersion: 1,
52+
evidenceLevel: 'document',
53+
evidenceStrength: 'source-linked',
54+
mechanism: 'layout-json',
55+
category: 'table',
56+
sourceDocument,
57+
sourcePath: 'basic/table.docx.layout.json',
58+
sourceOccurrenceId: 'occurrence-table-1',
59+
sourceAnchors: [sourceAnchor],
60+
pageNumbers: [1],
61+
jsonPath: '$.pages[0].blocks[3].width',
62+
normalizedPath: '$.pages[].blocks[].width',
63+
pathKind: 'table-width',
64+
diffKind: 'changed',
65+
deltaBucket: '+1px',
66+
rawDiffCount: 4,
67+
summary: 'Table width changed by about 1px.',
68+
metrics: { deltaPx: 1 },
69+
artifactRefs: [{ path: 'results/layout/basic/table.docx.layout.json.diff.json' }],
70+
};
71+
72+
const signature = {
73+
signatureId: 'signature_table_width_1',
74+
signatureVersion: 'public.v1',
75+
familyId: 'table-width',
76+
observationIds: ['observation_1'],
77+
category: 'table',
78+
mechanism: 'layout-json',
79+
normalizedKey: 'table-width|changed|+1px',
80+
familyKey: 'table-width|changed',
81+
pathKind: 'table-width',
82+
normalizedPath: '$.pages[].blocks[].width',
83+
diffKind: 'changed',
84+
deltaBucket: '+1px',
85+
instanceCount: 1,
86+
documentCount: 1,
87+
pageCount: 1,
88+
exampleObservationId: 'observation_1',
89+
confidence: 'high',
90+
};
91+
92+
const cluster = {
93+
clusterId: 'cluster_table_width_1',
94+
signatureIds: ['signature_table_width_1'],
95+
title: 'Table width changed by about 1px',
96+
instanceCount: 1,
97+
documentCount: 1,
98+
pageCount: 1,
99+
representativeObservationIds: ['observation_1'],
100+
evidenceStrength: 'source-linked',
101+
status: 'new',
102+
category: 'table',
103+
mechanism: 'layout-json',
104+
pathKind: 'table-width',
105+
allObservationIds: ['observation_1'],
106+
allInstances: [
107+
{
108+
observationId: 'observation_1',
109+
signatureId: 'signature_table_width_1',
110+
documentPath: 'basic/table.docx.layout.json',
111+
sourcePath: 'basic/table.docx.layout.json',
112+
sourceOccurrenceId: 'occurrence-table-1',
113+
sourceNodeIds: ['node-table-1'],
114+
schemaQNames: ['w:tbl'],
115+
pageNumbers: [1],
116+
jsonPath: '$.pages[0].blocks[3].width',
117+
normalizedPath: '$.pages[].blocks[].width',
118+
pathKind: 'table-width',
119+
summary: 'Table width changed by about 1px.',
120+
},
121+
],
122+
};
123+
124+
function stableObservationInput(value: ReturnType<typeof parseComparisonObservation>): unknown {
125+
const { observationId: _observationId, ...rest } = value;
126+
return rest;
127+
}
128+
129+
function listSourceFiles(directory: string): string[] {
130+
return readdirSync(directory, { withFileTypes: true }).flatMap((entry) => {
131+
const entryPath = path.join(directory, entry.name);
132+
if (entry.isDirectory()) return listSourceFiles(entryPath);
133+
return entry.isFile() && entry.name.endsWith('.ts') && !entry.name.endsWith('.test.ts') ? [entryPath] : [];
134+
});
135+
}
136+
137+
describe('public DOCX evidence contracts', () => {
138+
it('validates minimal source-linked observations', () => {
139+
const parsed = parseComparisonObservation(observation);
140+
const reparsed = comparisonObservationSchema.parse(JSON.parse(JSON.stringify(parsed)));
141+
142+
expect(reparsed).toEqual(parsed);
143+
expect(sourceAnchorSchema.parse(parsed.sourceAnchors?.[0])).toEqual(sourceAnchor);
144+
});
145+
146+
it('validates minimal signature and cluster records', () => {
147+
const parsedSignature = parseSignatureRecord(signature);
148+
const parsedCluster = parseClusterRecord(cluster);
149+
150+
expect(signatureRecordSchema.parse(JSON.parse(JSON.stringify(parsedSignature)))).toEqual(parsedSignature);
151+
expect(clusterRecordSchema.parse(JSON.parse(JSON.stringify(parsedCluster)))).toEqual(parsedCluster);
152+
});
153+
154+
it('validates render subjects without exposing analysis policy', () => {
155+
const parsed = renderSubjectSchema.parse({
156+
subjectId: 'subject_candidate',
157+
role: 'superdoc-candidate',
158+
rendererId: 'superdoc',
159+
rendererVersion: '1.30.0-next.8',
160+
evidenceLevel: 'document',
161+
artifactRefs: [{ path: 'candidate/layout.json' }],
162+
});
163+
164+
expect(parsed.role).toBe('superdoc-candidate');
165+
});
166+
167+
it('produces stable IDs from public artifacts', () => {
168+
const parsed = parseComparisonObservation(observation);
169+
const first = createStableId('observation', stableObservationInput(parsed));
170+
const second = createStableId('observation', stableObservationInput(parsed));
171+
172+
expect(first).toBe(second);
173+
expect(first.startsWith('observation_')).toBe(true);
174+
});
175+
176+
it('rejects fragment observations without fragment identity', () => {
177+
expect(
178+
comparisonObservationSchema.safeParse({
179+
...observation,
180+
evidenceLevel: 'fragment',
181+
}).success,
182+
).toBe(false);
183+
});
184+
185+
it('keeps public source Worker-safe and free of owner-runtime imports', () => {
186+
for (const sourceFile of listSourceFiles(sourceRoot)) {
187+
const text = readFileSync(sourceFile, 'utf8');
188+
189+
expect(text).not.toMatch(/from ['"]node:/);
190+
expect(text).not.toMatch(/from ['"].*\.\.\/\.\.\/\.\.\/labs/);
191+
expect(text).not.toMatch(/from ['"]@superdoc\/(super-editor|painter-dom|layout-engine|pm-adapter)/);
192+
}
193+
});
194+
});
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import type {
2+
EvidenceLevel,
3+
ObservationMechanism,
4+
RenderSubjectRole,
5+
SourceConfidence,
6+
StoryKind,
7+
} from './vocabulary.js';
8+
9+
export interface ArtifactRef {
10+
bucket?: string;
11+
key?: string;
12+
path?: string;
13+
sha256?: string;
14+
}
15+
16+
export interface SourceRef {
17+
partUri: string;
18+
xpathLikePath: string;
19+
line?: number;
20+
column?: number;
21+
rawFactId?: string;
22+
occurrenceId?: string;
23+
}
24+
25+
export interface DocumentIdentity {
26+
sourceKey?: string;
27+
sourceRelativePath?: string;
28+
originalSha256: string;
29+
normalizedSha256?: string;
30+
sourceDocRev?: string;
31+
documentRunId?: string;
32+
}
33+
34+
export interface NormalizedSourceIdentity {
35+
sourceDocument: DocumentIdentity;
36+
normalizedSha256: string;
37+
normalizationRunId?: string;
38+
normalizationKind?: 'superdoc-cleanup' | 'ooxml-canonicalization' | 'fragment-derivation' | 'other';
39+
}
40+
41+
export interface FragmentIdentity {
42+
parentDocument: DocumentIdentity;
43+
fragmentRunId: string;
44+
fragmentPath: string;
45+
fragmentSha256: string;
46+
storyKind: StoryKind;
47+
parentSourceRef?: SourceRef;
48+
reliabilityRef?: ArtifactRef;
49+
}
50+
51+
export interface RenderSubjectIdentity {
52+
role: RenderSubjectRole;
53+
rendererId: string;
54+
rendererVersion?: string;
55+
runtimeId?: string;
56+
platform?: string;
57+
superdocVersion?: string;
58+
superdocCommit?: string;
59+
}
60+
61+
export interface RenderSubject extends RenderSubjectIdentity {
62+
subjectId: string;
63+
evidenceLevel: EvidenceLevel;
64+
artifactRefs: ArtifactRef[];
65+
}
66+
67+
export interface RunIdentity {
68+
runId: string;
69+
documentRunId?: string;
70+
sourceDocument?: DocumentIdentity;
71+
owner?: string;
72+
stage?: string;
73+
startedAt?: string;
74+
parentRunId?: string;
75+
}
76+
77+
export interface ArtifactSetIdentity {
78+
artifactSetId: string;
79+
artifactKind: string;
80+
run: RunIdentity;
81+
rootRef: ArtifactRef;
82+
generatedAt?: string;
83+
}
84+
85+
export interface SchemaQNameEvidence {
86+
qName: string;
87+
namespaceUri?: string;
88+
prefix?: string;
89+
localName?: string;
90+
ownerElementQName?: string;
91+
schemaSource?: string;
92+
provenance?: string;
93+
classification?: 'strict' | 'transitional' | 'microsoft-extension' | 'w3c' | 'opc' | 'unknown';
94+
}
95+
96+
export interface SourceAnchor {
97+
sourceNodeId?: string;
98+
occurrenceId?: string;
99+
rawFactIds?: string[];
100+
schemaQNames?: SchemaQNameEvidence[];
101+
featureKey?: string;
102+
conceptKey?: string;
103+
sourceRef?: SourceRef;
104+
anchorConfidence?: SourceConfidence;
105+
pmNodeId?: string;
106+
pmRange?: {
107+
from: number;
108+
to: number;
109+
};
110+
flowBlockId?: string;
111+
layoutFragmentId?: string;
112+
paintItemId?: string;
113+
}
114+
115+
export interface WeakObservationIdentity {
116+
observationId: string;
117+
sourceDocument: DocumentIdentity;
118+
evidenceLevel: EvidenceLevel;
119+
mechanism: ObservationMechanism;
120+
sourcePath?: string;
121+
pageNumbers?: number[];
122+
jsonPath?: string;
123+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
export * from './identity.js';
2+
export * from './observations.js';
3+
export * from './schemas.js';
4+
export * from './stable-id.js';
5+
export * from './vocabulary.js';

0 commit comments

Comments
 (0)