File tree Expand file tree Collapse file tree
src/fetch/src/mcp_server_fetch Expand file tree Collapse file tree Original file line number Diff line number Diff line change 33
44import markdownify
55import readabilipy .simple_json
6+ from bs4 import BeautifulSoup
67from mcp .shared .exceptions import McpError
78from mcp .server import Server
89from mcp .server .stdio import stdio_server
@@ -71,9 +72,14 @@ def extract_content_from_html(html: str) -> str:
7172 if len (content .strip ()) >= min_expected_length :
7273 return content
7374
74- # Stage 3: Convert full HTML directly with markdownify (last resort)
75+ # Stage 3: Convert full HTML directly with markdownify (last resort).
76+ # Strip <script> and <style> first — markdownify renders them verbatim as
77+ # plain text, which injects large blobs of JS/CSS noise into the output.
78+ soup = BeautifulSoup (html , "html.parser" )
79+ for tag in soup (["script" , "style" ]):
80+ tag .decompose ()
7581 content = markdownify .markdownify (
76- html ,
82+ str ( soup ) ,
7783 heading_style = markdownify .ATX ,
7884 )
7985 if content .strip ():
You can’t perform that action at this time.
0 commit comments