@@ -33,19 +33,17 @@ class DotProcessor extends BaseProcessor {
3333
3434 // Extract all edge statements using regex to handle single-line DOT files
3535 const edgeRegex = / " ? ( [ ^ " \s ] + ) " ? \s * - > \s * " ? ( [ ^ " \s ] + ) " ? (?: \s * \[ l a b e l = " ( [ ^ " ] + ) " \] ) ? / g;
36- const nodeRegex = / " ? ( [ ^ " \s ] + ) " ? \s * \[ l a b e l = " ( [ ^ " ] + ) " \] / g;
3736
38- // Find all explicit node definitions
39- let nodeMatch ;
40- while ( ( nodeMatch = nodeRegex . exec ( content ) ) !== null ) {
41- const [ , id , label ] = nodeMatch ;
42- nodes . set ( id , { id, label } ) ;
43- }
37+ // We need to find nodes, but avoid matching the target of an edge which might look like a node definition
38+ // e.g. A -> B [label="L"] -- "B [label="L"]" looks like a node def
39+ // Strategy: Find all edges, record them, and then "mask" them in the content to avoid false positives for nodes
4440
45- // Find all edge definitions
41+ let maskedContent = content ;
4642 let edgeMatch ;
43+
44+ // Find all edge definitions
4745 while ( ( edgeMatch = edgeRegex . exec ( content ) ) !== null ) {
48- const [ , from , to , label ] = edgeMatch ;
46+ const [ fullMatch , from , to , label ] = edgeMatch ;
4947 edges . push ( { from, to, label } ) ;
5048
5149 // Add nodes if they don't exist (implicit definition)
@@ -55,6 +53,26 @@ class DotProcessor extends BaseProcessor {
5553 if ( ! nodes . has ( to ) ) {
5654 nodes . set ( to , { id : to , label : to } ) ;
5755 }
56+
57+ // Mask this edge in the content so we don't match it as a node
58+ // We replace it with spaces to preserve indices if needed, but simple replacement is enough here
59+ maskedContent = maskedContent . replace ( fullMatch , ' ' . repeat ( fullMatch . length ) ) ;
60+ }
61+
62+ // Now find explicit node definitions in the masked content
63+ // This regex matches: ID [label="LABEL"]
64+ // We use a non-greedy match for the label content to handle escaped quotes if possible,
65+ // but the previous regex `[^"]+` was too simple.
66+ // Better regex for quoted string content: (?:[^"\\]|\\.)*
67+ const nodeRegex = / " ? ( [ ^ " \s ] + ) " ? \s * \[ l a b e l = " ( (?: [ ^ " \\ ] | \\ .) * ) " \] / g;
68+
69+ let nodeMatch ;
70+ while ( ( nodeMatch = nodeRegex . exec ( maskedContent ) ) !== null ) {
71+ const [ , id , rawLabel ] = nodeMatch ;
72+ // Unescape the label: replace \" with " and \\ with \
73+ const label = rawLabel . replace ( / \\ " / g, '"' ) . replace ( / \\ \\ / g, '\\' ) ;
74+ // Only update if not already defined or if we want to override the implicit label
75+ nodes . set ( id , { id, label } ) ;
5876 }
5977
6078 return { nodes : Array . from ( nodes . values ( ) ) , edges } ;
@@ -111,7 +129,8 @@ class DotProcessor extends BaseProcessor {
111129 let hasControl = false ;
112130 for ( let i = 0 ; i < head . length ; i ++ ) {
113131 const code = head . charCodeAt ( i ) ;
114- if ( code === 0 || ( code >= 0 && code <= 8 ) || ( code >= 14 && code <= 31 ) || code >= 127 ) {
132+ // Allow UTF-8 characters (code >= 127)
133+ if ( code === 0 || ( code >= 0 && code <= 8 ) || ( code >= 14 && code <= 31 ) ) {
115134 hasControl = true ;
116135 break ;
117136 }
@@ -203,10 +222,15 @@ class DotProcessor extends BaseProcessor {
203222 saveFromTree ( tree : AACTree , _outputPath : string ) : void {
204223 let dotContent = 'digraph AACBoard {\n' ;
205224
225+ // Helper to escape DOT string
226+ const escapeDotString = ( str : string ) : string => {
227+ return str . replace ( / \\ / g, '\\\\' ) . replace ( / " / g, '\\"' ) ;
228+ } ;
229+
206230 // Add nodes
207231 for ( const pageId in tree . pages ) {
208232 const page = tree . pages [ pageId ] ;
209- dotContent += ` "${ page . id } " [label="${ page . name } "]\n` ;
233+ dotContent += ` "${ page . id } " [label="${ escapeDotString ( page . name ) } "]\n` ;
210234 }
211235
212236 // Add edges from navigation buttons (semantic intent or legacy targetPageId)
@@ -222,7 +246,7 @@ class DotProcessor extends BaseProcessor {
222246 . forEach ( ( btn : AACButton ) => {
223247 const target = btn . semanticAction ?. targetId || btn . targetPageId ;
224248 if ( target ) {
225- dotContent += ` "${ page . id } " -> "${ target } " [label="${ btn . label } "]\n` ;
249+ dotContent += ` "${ page . id } " -> "${ target } " [label="${ escapeDotString ( btn . label ) } "]\n` ;
226250 }
227251 } ) ;
228252 }
0 commit comments