@@ -13,21 +13,62 @@ type Sql = any;
1313
1414/**
1515 * Common OOXML prefix -> namespace map for parsing user qnames like "w:tbl".
16- * Documents may use other bindings; for those, callers can pass Clark form
17- * `{namespace}localName` or just `localName` and accept the WML default.
16+ *
17+ * Prefixes here are the conventional bindings used in real .docx / .xlsx /
18+ * .pptx packages and across the spec. A document may rebind any of these
19+ * (the XML spec lets `xmlns:w="..."` point anywhere), so for non-standard
20+ * bindings callers should pass Clark form `{namespace}localName` or just
21+ * `localName` and accept the WML default.
22+ *
23+ * Note: the spec PDF and the shipped XSDs occasionally disagree about the
24+ * canonical URI for a namespace. Where they differ we bind the prefix to
25+ * the URI used by the XSD (which is what the schema graph keys on). The
26+ * spec-prose URI is still reachable via `ooxml_search` / `ooxml_section`.
27+ * Example: `ds:` (custom XML data storage). The XSD targets
28+ * `.../officeDocument/2006/customXml`; ECMA-376 Part 1 §15.2.6 names
29+ * `.../officeDocument/customXmlDataProps`. We bind `ds` to the XSD URI.
1830 */
1931const COMMON_PREFIXES : Record < string , string > = {
32+ // Core ML vocabularies
2033 w : "http://schemas.openxmlformats.org/wordprocessingml/2006/main" ,
21- r : "http://schemas.openxmlformats.org/officeDocument/2006/relationships" ,
22- s : "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" ,
23- m : "http://schemas.openxmlformats.org/officeDocument/2006/math" ,
34+ x : "http://schemas.openxmlformats.org/spreadsheetml/2006/main" ,
35+ p : "http://schemas.openxmlformats.org/presentationml/2006/main" ,
36+
37+ // DrawingML
2438 a : "http://schemas.openxmlformats.org/drawingml/2006/main" ,
2539 wp : "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" ,
2640 pic : "http://schemas.openxmlformats.org/drawingml/2006/picture" ,
2741 c : "http://schemas.openxmlformats.org/drawingml/2006/chart" ,
2842 dgm : "http://schemas.openxmlformats.org/drawingml/2006/diagram" ,
43+ xdr : "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing" ,
44+
45+ // Shared / officeDocument family
46+ r : "http://schemas.openxmlformats.org/officeDocument/2006/relationships" ,
47+ s : "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes" ,
48+ m : "http://schemas.openxmlformats.org/officeDocument/2006/math" ,
49+ ds : "http://schemas.openxmlformats.org/officeDocument/2006/customXml" ,
50+ vt : "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes" ,
51+
52+ // Package / OPC core properties
53+ cp : "http://schemas.openxmlformats.org/package/2006/metadata/core-properties" ,
54+ dc : "http://purl.org/dc/elements/1.1/" ,
55+ dcterms : "http://purl.org/dc/terms/" ,
56+ dcmitype : "http://purl.org/dc/dcmitype/" ,
57+
58+ // Markup compatibility + Microsoft Word extensions
59+ mc : "http://schemas.openxmlformats.org/markup-compatibility/2006" ,
60+ w14 : "http://schemas.microsoft.com/office/word/2010/wordml" ,
61+ w15 : "http://schemas.microsoft.com/office/word/2012/wordml" ,
62+ w16 : "http://schemas.microsoft.com/office/word/2018/wordml" ,
63+
64+ // VML (legacy)
65+ v : "urn:schemas-microsoft-com:vml" ,
66+ o : "urn:schemas-microsoft-com:office:office" ,
67+
68+ // W3C built-ins
2969 xsd : "http://www.w3.org/2001/XMLSchema" ,
3070 xs : "http://www.w3.org/2001/XMLSchema" ,
71+ xsi : "http://www.w3.org/2001/XMLSchema-instance" ,
3172 xml : "http://www.w3.org/XML/1998/namespace" ,
3273} ;
3374
@@ -41,9 +82,17 @@ export interface ParsedQName {
4182
4283export type QNameParseResult = { ok : true ; qname : ParsedQName } | { ok : false ; reason : string } ;
4384
85+ /**
86+ * Sorted, comma-separated list of known prefixes (for error messages and
87+ * tool descriptions). Kept in sync with COMMON_PREFIXES via getter.
88+ */
89+ export function knownPrefixes ( ) : string [ ] {
90+ return Object . keys ( COMMON_PREFIXES ) . sort ( ) ;
91+ }
92+
4493/**
4594 * Parse a user-supplied qname. Accepts:
46- * - `prefix:localName` for known OOXML prefixes (w, r, s, m, a, wp, pic, c, dgm, xsd, xml)
95+ * - `prefix:localName` for any prefix in COMMON_PREFIXES
4796 * - `{namespace}localName` Clark form
4897 * - bare `localName` (assumes WML main namespace)
4998 */
@@ -70,7 +119,7 @@ export function parseQName(raw: string): QNameParseResult {
70119 if ( ! namespace ) {
71120 return {
72121 ok : false ,
73- reason : `unknown prefix '${ prefix } '. Use a known prefix (w, r, s, m, a, wp, pic, c, dgm), or Clark form {namespace}localName.` ,
122+ reason : `unknown prefix '${ prefix } '. Known prefixes: ${ knownPrefixes ( ) . join ( ", " ) } . Or pass Clark form {namespace}localName.` ,
74123 } ;
75124 }
76125 return { ok : true , qname : { namespace, localName, rawPrefix : prefix } } ;
@@ -613,3 +662,117 @@ export async function getNamespaceInfo(sql: Sql, uri: string): Promise<Namespace
613662 }
614663 return { uri, vocabularies : [ ...vocabSet ] . sort ( ) , profiles } ;
615664}
665+
666+ /**
667+ * List ingested namespaces, optionally filtered by a case-insensitive
668+ * substring of the URI. Returns one entry per namespace with the same
669+ * shape as `getNamespaceInfo` so callers can format the list uniformly.
670+ *
671+ * Only namespaces that actually contain symbols are returned: an empty
672+ * row in `xsd_namespaces` (no profile membership) is treated as not
673+ * present, which matches what an agent cares about.
674+ */
675+ export async function listNamespaces (
676+ sql : Sql ,
677+ opts : { query ?: string } = { } ,
678+ ) : Promise < NamespaceInfo [ ] > {
679+ const q = opts . query ?. trim ( ) ;
680+ const rows = q
681+ ? await sql `
682+ SELECT ns.uri, p.name AS profile_name, COUNT(*)::int AS symbol_count,
683+ array_agg(DISTINCT s.vocabulary_id) AS vocabularies
684+ FROM xsd_namespaces ns
685+ JOIN xsd_symbol_profiles sp ON sp.namespace_id = ns.id
686+ JOIN xsd_profiles p ON p.id = sp.profile_id
687+ JOIN xsd_symbols s ON s.id = sp.symbol_id
688+ WHERE ns.uri ILIKE ${ `%${ q } %` }
689+ GROUP BY ns.uri, p.name
690+ ORDER BY ns.uri, p.name
691+ `
692+ : await sql `
693+ SELECT ns.uri, p.name AS profile_name, COUNT(*)::int AS symbol_count,
694+ array_agg(DISTINCT s.vocabulary_id) AS vocabularies
695+ FROM xsd_namespaces ns
696+ JOIN xsd_symbol_profiles sp ON sp.namespace_id = ns.id
697+ JOIN xsd_profiles p ON p.id = sp.profile_id
698+ JOIN xsd_symbols s ON s.id = sp.symbol_id
699+ GROUP BY ns.uri, p.name
700+ ORDER BY ns.uri, p.name
701+ ` ;
702+
703+ const byUri = new Map < string , NamespaceInfo > ( ) ;
704+ for ( const r of rows ) {
705+ const uri = r . uri as string ;
706+ let info = byUri . get ( uri ) ;
707+ if ( ! info ) {
708+ info = { uri, vocabularies : [ ] , profiles : [ ] } ;
709+ byUri . set ( uri , info ) ;
710+ }
711+ info . profiles . push ( {
712+ name : r . profile_name as string ,
713+ symbolCount : r . symbol_count as number ,
714+ } ) ;
715+ for ( const v of ( r . vocabularies as string [ ] ) ?? [ ] ) {
716+ if ( ! info . vocabularies . includes ( v ) ) info . vocabularies . push ( v ) ;
717+ }
718+ }
719+ for ( const info of byUri . values ( ) ) info . vocabularies . sort ( ) ;
720+ return [ ...byUri . values ( ) ] ;
721+ }
722+
723+ export interface LocalNameHit {
724+ localName : string ;
725+ kind : string ;
726+ vocabularyId : string ;
727+ namespaceUri : string ;
728+ }
729+
730+ /**
731+ * Find top-level symbols with this local name across all namespaces in a
732+ * profile. Used to power "did you mean?" suggestions when an exact lookup
733+ * misses: e.g. `t` exists as both `w:t` and `a:t`, and an agent asking for
734+ * `t` deserves to see both.
735+ *
736+ * Returns at most 10 hits. Excludes local elements (parent_symbol_id IS
737+ * NOT NULL) for the same reason `lookupSymbol` does: they have no global
738+ * qname-addressable identity.
739+ */
740+ export async function findLocalNameAcrossNamespaces (
741+ sql : Sql ,
742+ localName : string ,
743+ profile : string ,
744+ opts : { kind ?: string } = { } ,
745+ ) : Promise < LocalNameHit [ ] > {
746+ const rows = opts . kind
747+ ? await sql `
748+ SELECT DISTINCT s.local_name, s.kind, s.vocabulary_id, ns.uri AS namespace_uri
749+ FROM xsd_symbols s
750+ JOIN xsd_symbol_profiles sp ON sp.symbol_id = s.id
751+ JOIN xsd_namespaces ns ON ns.id = sp.namespace_id
752+ JOIN xsd_profiles p ON p.id = sp.profile_id
753+ WHERE s.local_name = ${ localName }
754+ AND s.kind = ${ opts . kind }
755+ AND s.parent_symbol_id IS NULL
756+ AND p.name = ${ profile }
757+ ORDER BY s.vocabulary_id, s.kind
758+ LIMIT 10
759+ `
760+ : await sql `
761+ SELECT DISTINCT s.local_name, s.kind, s.vocabulary_id, ns.uri AS namespace_uri
762+ FROM xsd_symbols s
763+ JOIN xsd_symbol_profiles sp ON sp.symbol_id = s.id
764+ JOIN xsd_namespaces ns ON ns.id = sp.namespace_id
765+ JOIN xsd_profiles p ON p.id = sp.profile_id
766+ WHERE s.local_name = ${ localName }
767+ AND s.parent_symbol_id IS NULL
768+ AND p.name = ${ profile }
769+ ORDER BY s.vocabulary_id, s.kind
770+ LIMIT 10
771+ ` ;
772+ return rows . map ( ( r : Record < string , unknown > ) => ( {
773+ localName : r . local_name as string ,
774+ kind : r . kind as string ,
775+ vocabularyId : r . vocabulary_id as string ,
776+ namespaceUri : r . namespace_uri as string ,
777+ } ) ) ;
778+ }
0 commit comments