Skip to content

Commit 271119a

Browse files
Lev Gelfenbuimclaude
andcommitted
Fix SSRF vulnerability in URL fetching (CWE-918)
Validate URLs before fetching: block private/internal IPs, non-HTTP schemes, and DNS rebinding. Opt-in local access via --allow-local flag. Replace external URL test dependency with local HTTP server. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3c9a8b2 commit 271119a

3 files changed

Lines changed: 258 additions & 6 deletions

File tree

README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ An MCP (Model Context Protocol) server that converts HTML content to Markdown fo
2121
- [Local Development](#local-development)
2222
- [Testing](#testing)
2323
- [Publishing a New Version](#publishing-a-new-version)
24+
- [Security](#security)
25+
- [SSRF Protection](#ssrf-protection)
26+
- [Allowing Local Network Access](#allowing-local-network-access)
2427
- [Technical Details](#technical-details)
2528
- [Related Projects](#related-projects)
2629
- [License](#license)
@@ -33,6 +36,7 @@ An MCP (Model Context Protocol) server that converts HTML content to Markdown fo
3336
- 🗑️ Automatically removes unwanted elements (scripts, styles, etc.)
3437
- 📊 Auto-extracts page titles and metadata
3538
- ⚡ Fast conversion using Turndown.js
39+
- 🔒 **SSRF protection** - Blocks requests to private/internal networks by default
3640

3741
## Installation
3842

@@ -351,6 +355,7 @@ The test suite includes:
351355
- URL fetching tests
352356
- File saving tests
353357
- Truncation and large page handling tests
358+
- SSRF protection tests
354359
- Integration workflow tests
355360

356361
### Publishing a New Version
@@ -381,6 +386,47 @@ npm run release:minor --otp=<code>
381386
npm run release:major --otp=<code>
382387
```
383388

389+
## Security
390+
391+
### SSRF Protection
392+
393+
By default, the server blocks URL requests to private and internal network addresses to prevent [Server-Side Request Forgery (SSRF)](https://owasp.org/www-community/attacks/Server-Side_Request_Forgery) attacks. This includes:
394+
395+
- Loopback addresses (`127.0.0.0/8`, `::1`)
396+
- Private networks (`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`)
397+
- Link-local / cloud metadata endpoints (`169.254.0.0/16`)
398+
- Non-HTTP(S) schemes (`file://`, `ftp://`, etc.)
399+
400+
DNS resolution is checked to prevent bypass via hostnames that resolve to private IPs.
401+
402+
### Allowing Local Network Access
403+
404+
If you need to convert HTML from local or internal servers (e.g., a local dev server), you can opt in with the `--allow-local` flag or the `ALLOW_LOCAL_NETWORK` environment variable:
405+
406+
```bash
407+
# Via CLI flag
408+
npx html-to-markdown-mcp --allow-local
409+
```
410+
411+
```bash
412+
# Via environment variable
413+
ALLOW_LOCAL_NETWORK=true npx html-to-markdown-mcp
414+
```
415+
416+
**Claude Desktop / Cursor configuration with local access:**
417+
```json
418+
{
419+
"mcpServers": {
420+
"html-to-markdown": {
421+
"command": "npx",
422+
"args": ["html-to-markdown-mcp", "--allow-local"]
423+
}
424+
}
425+
}
426+
```
427+
428+
> **Warning:** Only enable local network access if you trust the AI agent's URL inputs. With this flag enabled, the server can reach internal services, localhost ports, and cloud metadata endpoints.
429+
384430
## Technical Details
385431

386432
- **Protocol:** Model Context Protocol (MCP)

index.js

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,84 @@ import TurndownService from "turndown";
1010
import { writeFile, readFile } from "fs/promises";
1111
import { resolve, dirname } from "path";
1212
import { fileURLToPath } from "url";
13+
import { lookup } from "dns/promises";
14+
15+
// SSRF protection: validate URLs before fetching (CWE-918)
16+
const BLOCKED_IP_RANGES = [
17+
// Loopback
18+
{ prefix: "127.", mask: null },
19+
// IPv6 loopback
20+
{ exact: "::1" },
21+
// Link-local (cloud metadata endpoints like 169.254.169.254)
22+
{ prefix: "169.254.", mask: null },
23+
// RFC 1918 private ranges
24+
{ prefix: "10.", mask: null },
25+
{ prefix: "172.", min: 16, max: 31 }, // 172.16.0.0 - 172.31.255.255
26+
{ prefix: "192.168.", mask: null },
27+
// IPv6 private/link-local
28+
{ prefix: "fe80:", mask: null },
29+
{ prefix: "fc00:", mask: null },
30+
{ prefix: "fd", mask: null },
31+
// Unspecified
32+
{ exact: "0.0.0.0" },
33+
{ exact: "::" },
34+
];
35+
36+
function isPrivateIP(ip) {
37+
for (const range of BLOCKED_IP_RANGES) {
38+
if (range.exact && ip === range.exact) return true;
39+
if (range.prefix && ip.startsWith(range.prefix)) {
40+
if (range.min !== undefined) {
41+
const secondOctet = parseInt(ip.split(".")[1], 10);
42+
if (secondOctet >= range.min && secondOctet <= range.max) return true;
43+
} else {
44+
return true;
45+
}
46+
}
47+
}
48+
return false;
49+
}
50+
51+
// Allow private network access via --allow-local flag or ALLOW_LOCAL_NETWORK=true env var
52+
const allowLocalNetwork = process.argv.includes("--allow-local") ||
53+
process.env.ALLOW_LOCAL_NETWORK === "true";
54+
55+
async function validateUrl(urlString) {
56+
let parsed;
57+
try {
58+
parsed = new URL(urlString);
59+
} catch {
60+
throw new Error("Invalid URL format");
61+
}
62+
63+
// Only allow http and https schemes
64+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
65+
throw new Error(`URL scheme '${parsed.protocol.slice(0, -1)}' is not allowed. Only http and https are supported.`);
66+
}
67+
68+
// Resolve hostname to IP and check against blocked ranges (unless opted out)
69+
if (!allowLocalNetwork) {
70+
const hostname = parsed.hostname;
71+
72+
// Check if hostname is already an IP literal
73+
if (isPrivateIP(hostname)) {
74+
throw new Error("URLs pointing to private or internal network addresses are not allowed. Use --allow-local flag to permit local network access.");
75+
}
76+
77+
// DNS resolution check
78+
try {
79+
const { address } = await lookup(hostname);
80+
if (isPrivateIP(address)) {
81+
throw new Error("URLs pointing to private or internal network addresses are not allowed. Use --allow-local flag to permit local network access.");
82+
}
83+
} catch (err) {
84+
if (err.message.includes("not allowed")) throw err;
85+
throw new Error(`Could not resolve hostname: ${hostname}`);
86+
}
87+
}
88+
89+
return parsed;
90+
}
1391

1492
// Get package version for user-agent
1593
const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -124,8 +202,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
124202
let pageUrl = url;
125203
let pageTitle = null;
126204

127-
// If URL is provided, fetch the HTML
205+
// If URL is provided, validate and fetch the HTML
128206
if (url) {
207+
await validateUrl(url);
129208
console.error(`Fetching HTML from: ${url}`);
130209
const response = await fetch(url, {
131210
headers: {

test/html-to-markdown.test.js

Lines changed: 132 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,31 @@ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
44
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
55
import { readFile, unlink } from "fs/promises";
66
import { existsSync } from "fs";
7+
import { createServer } from "http";
78

89
describe("HTML to Markdown MCP Server", () => {
910
let client;
1011
let transport;
12+
let httpServer;
13+
let httpPort;
1114

1215
before(async () => {
13-
// Create client and connect
16+
// Start a local HTTP server for URL fetch tests
17+
httpServer = createServer((req, res) => {
18+
res.writeHead(200, { "Content-Type": "text/html" });
19+
res.end("<html><head><title>Test Page</title></head><body><h1>Hello from local server</h1><p>Test content</p></body></html>");
20+
});
21+
await new Promise((resolve) => {
22+
httpServer.listen(0, "127.0.0.1", () => {
23+
httpPort = httpServer.address().port;
24+
resolve();
25+
});
26+
});
27+
28+
// Create client and connect (--allow-local so URL fetch test can reach local server)
1429
transport = new StdioClientTransport({
1530
command: "node",
16-
args: ["./index.js"],
31+
args: ["./index.js", "--allow-local"],
1732
});
1833

1934
client = new Client(
@@ -32,6 +47,7 @@ describe("HTML to Markdown MCP Server", () => {
3247
after(async () => {
3348
// Cleanup
3449
await client.close();
50+
httpServer.close();
3551

3652
// Remove test files
3753
const testFiles = [
@@ -87,17 +103,18 @@ describe("HTML to Markdown MCP Server", () => {
87103
});
88104

89105
it("should fetch and convert URL", async () => {
106+
const url = `http://127.0.0.1:${httpPort}/test-page`;
90107
const result = await client.callTool({
91108
name: "html_to_markdown",
92109
arguments: {
93-
url: "https://example.com",
110+
url,
94111
includeMetadata: true,
95112
},
96113
});
97114

98115
assert.strictEqual(result.content[0].type, "text");
99-
assert.ok(result.content[0].text.includes("Example Domain"));
100-
assert.ok(result.content[0].text.includes("**Source:** https://example.com"));
116+
assert.ok(result.content[0].text.includes("Hello from local server"));
117+
assert.ok(result.content[0].text.includes(`**Source:** ${url}`));
101118
});
102119

103120
it("should include metadata when requested", async () => {
@@ -231,6 +248,116 @@ describe("HTML to Markdown MCP Server", () => {
231248
});
232249
});
233250

251+
describe("SSRF protection", () => {
252+
let ssrfClient;
253+
let ssrfTransport;
254+
255+
before(async () => {
256+
// Separate server instance WITHOUT --allow-local for SSRF tests
257+
ssrfTransport = new StdioClientTransport({
258+
command: "node",
259+
args: ["./index.js"],
260+
});
261+
ssrfClient = new Client(
262+
{ name: "ssrf-test-client", version: "1.0.0" },
263+
{ capabilities: {} }
264+
);
265+
await ssrfClient.connect(ssrfTransport);
266+
});
267+
268+
after(async () => {
269+
await ssrfClient.close();
270+
});
271+
272+
it("should block loopback URLs", async () => {
273+
const result = await ssrfClient.callTool({
274+
name: "html_to_markdown",
275+
arguments: { url: "http://127.0.0.1:9999/ssrf" },
276+
});
277+
assert.strictEqual(result.isError, true);
278+
assert.ok(result.content[0].text.includes("private or internal"));
279+
});
280+
281+
it("should block localhost URLs", async () => {
282+
const result = await ssrfClient.callTool({
283+
name: "html_to_markdown",
284+
arguments: { url: "http://localhost:9999/ssrf" },
285+
});
286+
assert.strictEqual(result.isError, true);
287+
assert.ok(result.content[0].text.includes("private or internal"));
288+
});
289+
290+
it("should block cloud metadata endpoint", async () => {
291+
const result = await ssrfClient.callTool({
292+
name: "html_to_markdown",
293+
arguments: { url: "http://169.254.169.254/latest/meta-data/" },
294+
});
295+
assert.strictEqual(result.isError, true);
296+
assert.ok(result.content[0].text.includes("private or internal"));
297+
});
298+
299+
it("should block RFC 1918 private ranges (10.x)", async () => {
300+
const result = await ssrfClient.callTool({
301+
name: "html_to_markdown",
302+
arguments: { url: "http://10.0.0.1/admin" },
303+
});
304+
assert.strictEqual(result.isError, true);
305+
assert.ok(result.content[0].text.includes("private or internal"));
306+
});
307+
308+
it("should block RFC 1918 private ranges (192.168.x)", async () => {
309+
const result = await ssrfClient.callTool({
310+
name: "html_to_markdown",
311+
arguments: { url: "http://192.168.1.1/admin" },
312+
});
313+
assert.strictEqual(result.isError, true);
314+
assert.ok(result.content[0].text.includes("private or internal"));
315+
});
316+
317+
it("should block RFC 1918 private ranges (172.16-31.x)", async () => {
318+
const result = await ssrfClient.callTool({
319+
name: "html_to_markdown",
320+
arguments: { url: "http://172.16.0.1/admin" },
321+
});
322+
assert.strictEqual(result.isError, true);
323+
assert.ok(result.content[0].text.includes("private or internal"));
324+
});
325+
326+
it("should block file:// scheme", async () => {
327+
const result = await ssrfClient.callTool({
328+
name: "html_to_markdown",
329+
arguments: { url: "file:///etc/passwd" },
330+
});
331+
assert.strictEqual(result.isError, true);
332+
assert.ok(result.content[0].text.includes("not allowed"));
333+
});
334+
335+
it("should block ftp:// scheme", async () => {
336+
const result = await ssrfClient.callTool({
337+
name: "html_to_markdown",
338+
arguments: { url: "ftp://internal.server/data" },
339+
});
340+
assert.strictEqual(result.isError, true);
341+
assert.ok(result.content[0].text.includes("not allowed"));
342+
});
343+
344+
it("should allow valid public URLs (not blocked by SSRF check)", async () => {
345+
// We only verify the URL passes validation (no SSRF error).
346+
// The fetch itself may fail due to network/TLS issues in CI,
347+
// so we just confirm the error is NOT an SSRF block.
348+
const result = await ssrfClient.callTool({
349+
name: "html_to_markdown",
350+
arguments: { url: "https://example.com", includeMetadata: false },
351+
});
352+
if (result.isError) {
353+
assert.ok(!result.content[0].text.includes("private or internal"),
354+
"Public URL should not be blocked by SSRF protection");
355+
assert.ok(!result.content[0].text.includes("not allowed"),
356+
"HTTPS scheme should be allowed");
357+
}
358+
});
359+
});
360+
234361
describe("Workflow Integration", () => {
235362
it("should convert HTML and save to file", async () => {
236363
// First, convert HTML to markdown

0 commit comments

Comments
 (0)