Skip to content

Commit 435ee71

Browse files
committed
A few more tweaks based on feedback
See this comment: #1229 (review)
1 parent 418a102 commit 435ee71

1 file changed

Lines changed: 8 additions & 3 deletions

File tree

netlify/edge-functions/markdown-negotiation.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ const NOISE_SELECTORS = [
66
"style",
77
"noscript",
88
"template",
9-
"svg",
109
"header",
1110
"footer",
1211
"nav",
@@ -127,8 +126,7 @@ function htmlToMarkdownFallback(html: string, baseUrl: URL): string {
127126
.replace(/<!--[\s\S]*?-->/g, "")
128127
.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, "")
129128
.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, "")
130-
.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, "")
131-
.replace(/<svg\b[^>]*>[\s\S]*?<\/svg>/gi, "");
129+
.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript>/gi, "");
132130

133131
const titleMatch = sanitized.match(/<title\b[^>]*>([\s\S]*?)<\/title>/i);
134132
const title = titleMatch ? normalizeWhitespace(decodeHtmlEntities(stripTags(titleMatch[1]))) : "";
@@ -469,11 +467,13 @@ function selectContentRoot(doc: { querySelector: (selector: string) => any; body
469467
const preferredSelectors = [
470468
"#main article",
471469
"main article",
470+
".main article",
472471
"article.guide",
473472
"article",
474473
"#main",
475474
"[role='main']",
476475
"main",
476+
".main",
477477
"body",
478478
];
479479

@@ -496,6 +496,11 @@ function extractPrimaryHtmlFragment(html: string): string {
496496
return mainHtml;
497497
}
498498

499+
const guideArticleMatch = html.match(/<article\b[^>]*class=("')[^"']*\bguide\b[^"']*\1[^>]*>([\s\S]*?)<\/article>/i);
500+
if (guideArticleMatch) {
501+
return guideArticleMatch[2];
502+
}
503+
499504
const articleMatch = html.match(/<article\b[^>]*>([\s\S]*?)<\/article>/i);
500505
if (articleMatch) {
501506
return articleMatch[1];

0 commit comments

Comments
 (0)