modelcontextprotocol · olegsa · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/src/fetch/pyproject.toml b/src/fetch/pyproject.toml
@@ -16,6 +16,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
 ]
 dependencies = [
+    "charset-normalizer>=3.0.0",
     "httpx>=0.27",
     "markdownify>=0.13.1",
     "mcp>=1.1.3",

diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py
@@ -1,8 +1,10 @@
 from typing import Annotated, Tuple
 from urllib.parse import urlparse, urlunparse
 
+import httpx
 import markdownify
 import readabilipy.simple_json
+from charset_normalizer import from_bytes
 from mcp.shared.exceptions import McpError
 from mcp.server import Server
 from mcp.server.stdio import stdio_server
@@ -24,6 +26,18 @@
 DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
 
 
+def get_response_text(response: httpx.Response) -> str:
+    """Return decoded text from an httpx response, with automatic encoding
+    detection via charset_normalizer for pages that don't declare charset
+    in the HTTP Content-Type header."""
+    if response.charset_encoding is not None:
+        return response.text
+    result = from_bytes(response.content).best()
+    if result is not None:
+        return str(result)
+    return response.text
+
+
 def extract_content_from_html(html: str) -> str:
     """Extract and convert HTML content to Markdown format.
 
@@ -68,18 +82,16 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url:
     Check if the URL can be fetched by the user agent according to the robots.txt file.
     Raises a McpError if not.
     """
-    from httpx import AsyncClient, HTTPError
-
     robot_txt_url = get_robots_txt_url(url)
 
-    async with AsyncClient(proxy=proxy_url) as client:
+    async with httpx.AsyncClient(proxy=proxy_url) as client:
         try:
             response = await client.get(
                 robot_txt_url,
                 follow_redirects=True,
                 headers={"User-Agent": user_agent},
             )
-        except HTTPError:
+        except httpx.HTTPError:
             raise McpError(ErrorData(
                 code=INTERNAL_ERROR,
                 message=f"Failed to fetch robots.txt {robot_txt_url} due to a connection issue",
@@ -114,25 +126,23 @@ async def fetch_url(
     """
     Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
     """
-    from httpx import AsyncClient, HTTPError
-
-    async with AsyncClient(proxy=proxy_url) as client:
+    async with httpx.AsyncClient(proxy=proxy_url) as client:
         try:
             response = await client.get(
                 url,
                 follow_redirects=True,
                 headers={"User-Agent": user_agent},
                 timeout=30,
             )
-        except HTTPError as e:
+        except httpx.HTTPError as e:
             raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Failed to fetch {url}: {e!r}"))
         if response.status_code >= 400:
             raise McpError(ErrorData(
                 code=INTERNAL_ERROR,
                 message=f"Failed to fetch {url} - status code {response.status_code}",
             ))
 
-        page_raw = response.text
+        page_raw = get_response_text(response)
 
     content_type = response.headers.get("content-type", "")
     is_page_html = (

diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py
@@ -1,11 +1,13 @@
 """Tests for the fetch MCP server."""
 
+import httpx
 import pytest
 from unittest.mock import AsyncMock, patch, MagicMock
 from mcp.shared.exceptions import McpError
 
 from mcp_server_fetch.server import (
     extract_content_from_html,
+    get_response_text,
     get_robots_txt_url,
     check_may_autonomously_fetch_url,
     fetch_url,
@@ -324,3 +326,53 @@ async def test_fetch_with_proxy(self):
 
             # Verify AsyncClient was called with proxy
             mock_client_class.assert_called_once_with(proxy="http://proxy.example.com:8080")
+
+
+def _build_response(raw_body: bytes, content_type: str = "text/html") -> httpx.Response:
+    """Build a real httpx.Response without charset in the HTTP header."""
+    return httpx.Response(
+        status_code=200,
+        headers={"content-type": content_type},
+        content=raw_body,
+    )
+
+
+class TestGetResponseText:
+    """Tests for get_response_text with various non-UTF-8 encodings."""
+
+    def test_utf8_passthrough(self):
+        """UTF-8 pages with charset declared in HTTP header use the standard path."""
+        text = "Hello World"
+        resp = httpx.Response(
+            200,
+            headers={"content-type": "text/html; charset=utf-8"},
+            content=text.encode("utf-8"),
+        )
+        assert get_response_text(resp) == text
+
+    def test_ukrainian_windows_1251(self):
+        text = "Київ це найбільше місто України із населенням понад три мільйони людей та є столицею нашої держави і культурним центром країни."
+        body = (
+            b"<html><body><p>" + text.encode("windows-1251") + b"</p></body></html>"
+        )
+        assert text in get_response_text(_build_response(body))
+
+    def test_hebrew_windows_1255(self):
+        text = "ירושלים היא הבירה של ישראל ועיר קדושה לשלוש הדתות המונותאיסטיות הגדולות העיר שוכנת בהרי יהודה ומהווה מרכז דתי ותרבותי חשוב."
+        body = (
+            b"<html><body><p>" + text.encode("windows-1255") + b"</p></body></html>"
+        )
+        assert text in get_response_text(_build_response(body))
+
+    def test_arabic_windows_1256(self):
+        text = "القاهرة هي عاصمة جمهورية مصر العربية وأكبر مدنها تقع على ضفاف نهر النيل وتعتبر من أكبر المدن في الشرق الأوسط وأفريقيا."
+        body = (
+            b"<html><body><p>" + text.encode("windows-1256") + b"</p></body></html>"
+        )
+        assert text in get_response_text(_build_response(body))
+
+    def test_korean_euc_kr(self):
+        text = "서울특별시는 대한민국의 수도이자 최대 도시이다"
+        body = text.encode("utf-8")
+        resp = httpx.Response(200, headers={"content-type": "text/html"}, content=body)
+        assert text in get_response_text(resp)
diff --git a/src/fetch/uv.lock b/src/fetch/uv.lock