@@ -128,6 +128,9 @@ function extractMainContent(doc: Document): string {
128128 return "" ;
129129 }
130130
131+ // Preprocess annotated code blocks before converting to markdown
132+ preprocessAnnotatedCodeBlocks ( clone , main as Element ) ;
133+
131134 // Return a minimal HTML document with just the content
132135 return `<!DOCTYPE html>
133136<html>
@@ -138,6 +141,65 @@ ${main.innerHTML}
138141</html>` ;
139142}
140143
144+ /**
145+ * Preprocess annotated code blocks for llms output.
146+ * Restores original code text (with annotation markers) and converts
147+ * the annotation definition list to an ordered list.
148+ */
149+ function preprocessAnnotatedCodeBlocks ( doc : Document , container : Element ) : void {
150+ // Restore original code text in annotated code blocks.
151+ // The llms-code-annotations.lua filter saves the original text
152+ // (before code-annotation.lua strips markers) as a data attribute.
153+ const annotated = container . querySelectorAll ( "[data-llms-code-original]" ) ;
154+ for ( const node of annotated ) {
155+ const el = node as Element ;
156+ const originalText = el . getAttribute ( "data-llms-code-original" ) ;
157+ if ( ! originalText ) continue ;
158+
159+ // The attribute is on the wrapper div; find the <code> element inside
160+ const codeEl = el . tagName === "CODE"
161+ ? el
162+ : el . querySelector ( "code" ) as Element | null ;
163+ if ( codeEl ) {
164+ // Replace content with original (removes syntax highlighting spans + annotation buttons)
165+ codeEl . textContent = originalText ;
166+ }
167+
168+ el . removeAttribute ( "data-llms-code-original" ) ;
169+ }
170+
171+ // Remove annotation gutter elements
172+ const gutters = container . querySelectorAll (
173+ ".code-annotation-gutter, .code-annotation-gutter-bg" ,
174+ ) ;
175+ for ( const gutter of gutters ) {
176+ ( gutter as Element ) . remove ( ) ;
177+ }
178+
179+ // Convert annotation definition lists to ordered lists.
180+ // The annotation text is in <dd> elements; <dt> elements have just the number.
181+ const dls = container . querySelectorAll ( "dl.code-annotation-container-grid" ) ;
182+ for ( const dlNode of dls ) {
183+ const dl = dlNode as Element ;
184+ const ol = doc . createElement ( "ol" ) ;
185+ const dds = dl . querySelectorAll ( "dd" ) ;
186+ for ( const ddNode of dds ) {
187+ const dd = ddNode as Element ;
188+ const li = doc . createElement ( "li" ) ;
189+ li . innerHTML = dd . innerHTML ;
190+ ol . appendChild ( li ) ;
191+ }
192+
193+ // Replace the DL (and its cell-annotation wrapper div if present)
194+ const parent = dl . parentElement ;
195+ if ( parent && parent . classList . contains ( "cell-annotation" ) ) {
196+ parent . parentElement ?. replaceChild ( ol , parent ) ;
197+ } else {
198+ dl . parentElement ?. replaceChild ( ol , dl ) ;
199+ }
200+ }
201+ }
202+
141203/**
142204 * Convert HTML content to markdown using Pandoc with the llms.lua filter.
143205 */
0 commit comments