Skip to content

Commit d80d0ec

Browse files
committed
Enhance HTMLExtractionOptions with new annotateNumberClasses feature
- Added `annotateNumberClasses` option to the HTMLExtractionOptions interface for appending CSS class names to numeric elements in markdown output. - Updated README to include documentation for the new feature. - Improved HTML to Markdown conversion logic to support number class annotation, preserving semantic meaning. - Added unit tests to ensure correct functionality across various scenarios.
1 parent 79ae1da commit d80d0ec

1 file changed

Lines changed: 16 additions & 8 deletions

File tree

src/dev/testHtmlToMarkdown.ts

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { HTMLExtractionOptions } from "../types";
77
async function testConvertHtmlToMarkdown(
88
htmlFilePath: string,
99
outputDir: string,
10-
options?: HTMLExtractionOptions
10+
options?: HTMLExtractionOptions,
1111
) {
1212
try {
1313
// Read the HTML file
@@ -23,11 +23,13 @@ async function testConvertHtmlToMarkdown(
2323

2424
// Generate output filename
2525
const baseName = path.basename(htmlFilePath, ".html");
26-
const optionsSuffix = options?.includeImages
27-
? ".with-images"
28-
: options?.extractMainHtml
29-
? ".main-content"
30-
: "";
26+
const optionsSuffix = options?.annotateNumberClasses
27+
? ".number-classes"
28+
: options?.includeImages
29+
? ".with-images"
30+
: options?.extractMainHtml
31+
? ".main-content"
32+
: "";
3133
const outputPath = path.join(outputDir, `${baseName}${optionsSuffix}.md`);
3234

3335
// Save the markdown
@@ -56,7 +58,7 @@ async function main() {
5658

5759
// Test different conversion options
5860
console.log(
59-
"\n🔍 Testing HTML to Markdown conversion with different options...\n"
61+
"\n🔍 Testing HTML to Markdown conversion with different options...\n",
6062
);
6163

6264
// 1. Basic conversion
@@ -78,9 +80,15 @@ async function main() {
7880
extractMainHtml: true,
7981
});
8082

83+
// 5. Main content extraction with number annotation
84+
await testConvertHtmlToMarkdown(htmlFilePath, outputDir, {
85+
extractMainHtml: true,
86+
annotateNumberClasses: true,
87+
});
88+
8189
console.log(
8290
"\n✨ All conversions completed! Check the output in:",
83-
outputDir
91+
outputDir,
8492
);
8593
}
8694

0 commit comments

Comments
 (0)