Skip to content

Commit 498ae9a

Browse files
committed
fix: validate parsed URL to prevent path-traversal SSRF in wiki explorer
Replace raw regex matching on the URL string with parsed URL validation using the URL constructor. This prevents path-traversal attacks where URLs like `/wiki/../../w/api.php` pass the regex but resolve to paths outside `/wiki/` after URL normalization. Also set `redirect: "error"` on fetch() to prevent following redirects to non-Wikipedia domains. CWE-918
1 parent 0008d3b commit 498ae9a

2 files changed

Lines changed: 143 additions & 2 deletions

File tree

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import { describe, it, expect, beforeEach, afterEach, spyOn } from "bun:test";
2+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
3+
import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
4+
import { createServer } from "./server";
5+
6+
function firstText(r: Awaited<ReturnType<Client["callTool"]>>): string {
7+
return (r.content as Array<{ type: string; text: string }>)[0].text;
8+
}
9+
10+
describe("wiki-explorer URL validation", () => {
11+
let server: ReturnType<typeof createServer>;
12+
let client: Client;
13+
14+
beforeEach(async () => {
15+
server = createServer();
16+
client = new Client({ name: "test", version: "1" });
17+
const [ct, st] = InMemoryTransport.createLinkedPair();
18+
await Promise.all([server.connect(st), client.connect(ct)]);
19+
});
20+
21+
afterEach(async () => {
22+
await client.close();
23+
await server.close();
24+
});
25+
26+
it("rejects non-Wikipedia URLs", async () => {
27+
const r = await client.callTool({
28+
name: "get-first-degree-links",
29+
arguments: { url: "https://evil.com/wiki/Test" },
30+
});
31+
const result = JSON.parse(firstText(r));
32+
expect(result.error).toBe("Not a valid Wikipedia URL");
33+
});
34+
35+
it("rejects path traversal that escapes /wiki/", async () => {
36+
// This URL passes the old regex but resolves to /w/api.php (outside /wiki/)
37+
const r = await client.callTool({
38+
name: "get-first-degree-links",
39+
arguments: {
40+
url: "https://en.wikipedia.org/wiki/../../w/api.php?action=query&list=allusers",
41+
},
42+
});
43+
const result = JSON.parse(firstText(r));
44+
expect(result.error).toBe("Not a valid Wikipedia URL");
45+
});
46+
47+
it("rejects path traversal to API endpoints", async () => {
48+
const r = await client.callTool({
49+
name: "get-first-degree-links",
50+
arguments: {
51+
url: "https://en.wikipedia.org/wiki/../../../api/rest_v1/feed/featured/2024/01/01",
52+
},
53+
});
54+
const result = JSON.parse(firstText(r));
55+
expect(result.error).toBe("Not a valid Wikipedia URL");
56+
});
57+
58+
it("accepts valid Wikipedia URLs", async () => {
59+
// Mock fetch to avoid real network requests
60+
const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce(
61+
new Response("<html><body><a href='/wiki/Test'>Test</a></body></html>", {
62+
status: 200,
63+
headers: { "Content-Type": "text/html" },
64+
}),
65+
);
66+
67+
try {
68+
const r = await client.callTool({
69+
name: "get-first-degree-links",
70+
arguments: {
71+
url: "https://en.wikipedia.org/wiki/Model_Context_Protocol",
72+
},
73+
});
74+
const result = JSON.parse(firstText(r));
75+
expect(result.error).toBeNull();
76+
expect(result.page.url).toBe(
77+
"https://en.wikipedia.org/wiki/Model_Context_Protocol",
78+
);
79+
} finally {
80+
mockFetch.mockRestore();
81+
}
82+
});
83+
84+
it("disables redirect following on fetch", async () => {
85+
// Ensure fetch is called with redirect: 'error' or 'manual' to prevent
86+
// following redirects to non-Wikipedia domains
87+
const mockFetch = spyOn(globalThis, "fetch").mockResolvedValueOnce(
88+
new Response("<html><body></body></html>", {
89+
status: 200,
90+
headers: { "Content-Type": "text/html" },
91+
}),
92+
);
93+
94+
try {
95+
await client.callTool({
96+
name: "get-first-degree-links",
97+
arguments: {
98+
url: "https://en.wikipedia.org/wiki/Test_Page",
99+
},
100+
});
101+
expect(mockFetch).toHaveBeenCalledTimes(1);
102+
const fetchArgs = mockFetch.mock.calls[0];
103+
// Second argument should have redirect: "error"
104+
expect(fetchArgs[1]).toBeDefined();
105+
expect((fetchArgs[1] as RequestInit).redirect).toBe("error");
106+
} finally {
107+
mockFetch.mockRestore();
108+
}
109+
});
110+
});

examples/wiki-explorer-server/server.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,37 @@ function extractTitleFromUrl(url: string): string {
3131
}
3232
}
3333

34+
/**
35+
* Validate that a URL points to a Wikipedia wiki page.
36+
* Uses parsed URL components (not raw string matching) to prevent
37+
* path-traversal bypasses such as `/wiki/../../w/api.php`.
38+
*/
39+
function isValidWikipediaUrl(url: string): boolean {
40+
let parsed: URL;
41+
try {
42+
parsed = new URL(url);
43+
} catch {
44+
return false;
45+
}
46+
47+
// Protocol must be HTTPS (or HTTP for dev, matching prior behaviour)
48+
if (parsed.protocol !== "https:" && parsed.protocol !== "http:") {
49+
return false;
50+
}
51+
52+
// Hostname must be <lang>.wikipedia.org — language codes are lowercase ASCII
53+
if (!/^[a-z]+\.wikipedia\.org$/.test(parsed.hostname)) {
54+
return false;
55+
}
56+
57+
// After URL resolution, the pathname must still start with /wiki/
58+
if (!parsed.pathname.startsWith("/wiki/")) {
59+
return false;
60+
}
61+
62+
return true;
63+
}
64+
3465
// Wikipedia namespace prefixes to exclude from link extraction
3566
const EXCLUDED_PREFIXES = [
3667
"Wikipedia:",
@@ -113,13 +144,13 @@ export function createServer(): McpServer {
113144
let title = url;
114145

115146
try {
116-
if (!url.match(/^https?:\/\/[a-z]+\.wikipedia\.org\/wiki\//)) {
147+
if (!isValidWikipediaUrl(url)) {
117148
throw new Error("Not a valid Wikipedia URL");
118149
}
119150

120151
title = extractTitleFromUrl(url);
121152

122-
const response = await fetch(url);
153+
const response = await fetch(url, { redirect: "error" });
123154

124155
if (!response.ok) {
125156
throw new Error(

0 commit comments

Comments
 (0)