@@ -304,6 +304,7 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
304304 const kNewlineModeTok = kOf ( newline ?. token ?? null ) ;
305305 const kIndentTok = kOf ( indent ?. indentToken ?? null ) , kDedentTok = kOf ( indent ?. dedentToken ?? null ) , kIndentNewlineTok = kOf ( indent ?. newlineToken ?? null ) ;
306306 const kBlockScalarTok = kOf ( indent ?. blockScalar ?. token ?? null ) ;
307+ const kRawBlockTok = kOf ( indent ?. rawBlock ?. token ?? null ) ;
307308 const kPlainCont = kOf ( plainContinuationTokenName ) ;
308309 const tColon = puLitOf . get ( ':' ) ?? 0 ;
309310
@@ -343,6 +344,14 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
343344 // denoted, so the "newline-or-EOF" alternation is unchanged.
344345 const blockScalarSig = / [ | > ] (?: [ 1 - 9 ] [ + - ] ? | [ + - ] [ 1 - 9 ] ? | [ + - ] | ) [ \t ] * (?: (?< = [ \t ] ) # [ ^ \n ] * ) ? (?: \r ? \n | $ ) / y;
345346 if ( indent ?. blockScalar ) indentTokenNames . add ( indent . blockScalar . token ) ;
347+ // Raw content blocks: a line-TRAILING introducer (e.g. Pug-style `tag:mode` at end of a line)
348+ // whose SIGNATURE must match from the introducer char through end-of-line. Sticky, like
349+ // blockScalarSig. `introChar` is the first char of the signature's match (a cheap pre-filter).
350+ const rawBlockSig = indent ?. rawBlock
351+ ? new RegExp ( indent . rawBlock . signature ?? ':(?:[A-Za-z][A-Za-z0-9-]*)?[ \\t]*(?:\\r?\\n|$)' , 'y' )
352+ : null ;
353+ const rawBlockChar = indent ?. rawBlock ?. introChar ?? ':' ;
354+ if ( indent ?. rawBlock ) indentTokenNames . add ( indent . rawBlock . token ) ;
346355 // Col-0 strings (`---`/`...`) that always end a block scalar — a document boundary outranks
347356 // indentation — and, when one heads the introducer's line, mark a document-ROOT scalar.
348357 const blockScalarDocMarkers = indent ?. blockScalar ?. documentMarkers ?? [ ] ;
@@ -684,7 +693,10 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
684693 throw new Error ( `Tab character used in indentation at offset ${ p } ` ) ;
685694 }
686695 }
687- if ( lineComment && source . startsWith ( lineComment , p ) ) { // comment-only line — ignored
696+ if ( lineComment && source . startsWith ( lineComment , p )
697+ // commentExcept: a comment introducer immediately followed by this string is NOT a
698+ // comment line (e.g. `//` strip-comments vs `//!` doc-comments) — fall through to tokens.
699+ && ! ( indent ?. commentExcept && source . startsWith ( indent . commentExcept , p + lineComment . length ) ) ) {
688700 let e = p ; while ( e < source . length && source [ e ] !== '\n' ) e ++ ;
689701 pos = e ; pendingComment = true ; continue ; // next iteration consumes the newline
690702 }
@@ -904,6 +916,58 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
904916 continue ;
905917 }
906918
919+ // ── Raw content block: a line-TRAILING `:mode` introducer (per
920+ // indent.rawBlock.signature, matched at this position through end of line) captures all
921+ // following lines more indented than the introducer's line as ONE verbatim token (blank
922+ // lines included). The introducer must be GLUED to preceding line content (`script:`,
923+ // `article:md`) or sit at the line lead (`:md` — implicit element). The analogue of the
924+ // YAML block scalar above, but introduced at line END rather than by a leading `|`/`>`. ──
925+ if ( indent ?. rawBlock && flowDepth === 0 && rawBlockSig && source [ pos ] === rawBlockChar
926+ && ( ( rawBlockSig . lastIndex = pos ) , rawBlockSig . test ( source ) ) ) {
927+ let lineBegin = pos ; while ( lineBegin > 0 && source [ lineBegin - 1 ] !== '\n' ) lineBegin -- ;
928+ const beforeText = source . slice ( lineBegin , pos ) ;
929+ // GLUED means: the introducer follows the line's tag-head/attrs with NO top-level
930+ // whitespace anywhere before it (whitespace inside balanced parens/quotes is fine —
931+ // `div(a="1" b):md`). A trailing colon after inline TEXT (`label Size:`) has a
932+ // top-level space, so it stays text and never opens a raw block.
933+ const glued = beforeText . length > 0 && / \S / . test ( beforeText ) && ( ( ) => {
934+ let depth = 0 , quote = '' ;
935+ const lead = beforeText . match ( / ^ [ \t ] * / ) ! [ 0 ] . length ; // leading indentation is fine
936+ for ( let i = lead ; i < beforeText . length ; i ++ ) {
937+ const ch = beforeText [ i ] ;
938+ if ( quote ) { if ( ch === quote ) quote = '' ; continue ; }
939+ if ( ch === '"' || ch === "'" || ch === '`' ) quote = ch ;
940+ else if ( ch === '(' ) depth ++ ;
941+ else if ( ch === ')' ) depth = Math . max ( 0 , depth - 1 ) ;
942+ else if ( ( ch === ' ' || ch === '\t' ) && depth === 0 ) return false ;
943+ }
944+ return true ;
945+ } ) ( ) ;
946+ const atLead = / ^ [ \t ] * $ / . test ( beforeText ) ;
947+ if ( glued || atLead ) {
948+ const startPos = pos ;
949+ const parent = indentStack [ indentStack . length - 1 ] ;
950+ let p = pos ; while ( p < source . length && source [ p ] !== '\n' ) p ++ ; if ( p < source . length ) p ++ ; // skip the header line
951+ while ( p < source . length ) {
952+ let q = p , c = 0 ;
953+ while ( q < source . length && source [ q ] === ' ' ) { q ++ ; c ++ ; }
954+ if ( q >= source . length ) { p = q ; break ; }
955+ if ( source [ q ] === '\n' || source [ q ] === '\r' ) { // blank line — part of the block
956+ p = q + 1 ; if ( source [ q ] === '\r' && source [ p ] === '\n' ) p ++ ;
957+ continue ;
958+ }
959+ if ( c > parent ) { // content line — more indented than the introducer's line
960+ let e = q ; while ( e < source . length && source [ e ] !== '\n' ) e ++ ; p = e < source . length ? e + 1 : e ;
961+ }
962+ else break ; // dedent → the raw block ends
963+ }
964+ push ( mkNamed ( indent . rawBlock . token , source . slice ( startPos , p ) , startPos , kRawBlockTok ) ) ;
965+ pos = p ;
966+ lineStart = true ;
967+ continue ;
968+ }
969+ }
970+
907971 // Close an interpolation hole (interpClose at baseline depth) → resume the template span.
908972 if ( templateStack . length > 0 && source . startsWith ( tplInterpClose , pos ) ) {
909973 const depth = templateStack [ templateStack . length - 1 ] ;
@@ -982,7 +1046,10 @@ export function createLexer(grammar: CstGrammar, intern?: LexerIntern) {
9821046 // the separator — emit it as the `:` punctuation literal here. Gated on flow (block-context `:`
9831047 // separators are handled by the KEY-position lookaheads). yaml-test-suite 5MUD / 5T43 / 9MMW
9841048 // / C2DT / K3WX (quoted key) and the flow-collection-key cohort.
985- if ( indent && flowDepth > 0 && source [ pos ] === ':' ) {
1049+ // flowColonSeparator: false disables the YAML `"key":value` / ` }: value` flow
1050+ // separator carve-out, for indentation grammars with `:name`-shaped tokens that
1051+ // may legally follow a quoted value or a flow-close delimiter.
1052+ if ( indent && indent . flowColonSeparator !== false && flowDepth > 0 && source [ pos ] === ':' ) {
9861053 const prevTok = tokens [ tokens . length - 1 ] ;
9871054 if ( prevTok && ( stringTokenNames . has ( prevTok . type ) || ( prevTok . type === '' && flowCloseSet . has ( prevTok . text ) ) ) ) {
9881055 push ( mkPu ( ':' , pos , tColon ) ) ;
0 commit comments