Skip to content

Commit 7c154e2

Browse files
committed
feat(pdf-server): Normalize arxiv URLs to PDF format
arxiv.org/abs/... -> arxiv.org/pdf/... Applied both at startup and when loading dynamic URLs.
1 parent 02f173d commit 7c154e2

2 files changed

Lines changed: 18 additions & 9 deletions

File tree

examples/pdf-server/server.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import fs from "node:fs/promises";
1919
import path from "node:path";
2020
import { z } from "zod";
2121

22-
import { buildPdfIndex, findEntryByUrl, createEntry, isArxivUrl, isFileUrl, toFileUrl } from "./src/pdf-indexer.js";
22+
import { buildPdfIndex, findEntryByUrl, createEntry, isArxivUrl, isFileUrl, toFileUrl, normalizeArxivUrl } from "./src/pdf-indexer.js";
2323
import { loadPdfBytesChunk, populatePdfMetadata } from "./src/pdf-loader.js";
2424
import { ReadPdfBytesInputSchema, PdfBytesChunkSchema, type PdfIndex } from "./src/types.js";
2525
import { startServer } from "./server-utils.js";
@@ -86,13 +86,15 @@ export function createServer(): McpServer {
8686
}),
8787
_meta: { ui: { resourceUri: RESOURCE_URI } },
8888
},
89-
async ({ url, page }): Promise<CallToolResult> => {
89+
async ({ url: rawUrl, page }): Promise<CallToolResult> => {
9090
if (!pdfIndex) throw new Error("Not initialized");
9191

92+
// Normalize arxiv URLs to PDF format
93+
const url = isArxivUrl(rawUrl) ? normalizeArxivUrl(rawUrl) : rawUrl;
94+
9295
let entry = findEntryByUrl(pdfIndex, url);
9396

9497
if (!entry) {
95-
// Dynamic loading: only arxiv.org allowed
9698
if (isFileUrl(url)) {
9799
throw new Error("File URLs must be in the initial list");
98100
}
@@ -144,12 +146,14 @@ function parseArgs(): { urls: string[]; stdio: boolean } {
144146
if (arg === "--stdio") {
145147
stdio = true;
146148
} else if (!arg.startsWith("-")) {
147-
// Convert local paths to file:// URLs
148-
if (arg.startsWith("http://") || arg.startsWith("https://") || arg.startsWith("file://")) {
149-
urls.push(arg);
150-
} else {
151-
urls.push(toFileUrl(arg));
149+
// Convert local paths to file:// URLs, normalize arxiv URLs
150+
let url = arg;
151+
if (!arg.startsWith("http://") && !arg.startsWith("https://") && !arg.startsWith("file://")) {
152+
url = toFileUrl(arg);
153+
} else if (isArxivUrl(arg)) {
154+
url = normalizeArxivUrl(arg);
152155
}
156+
urls.push(url);
153157
}
154158
}
155159

examples/pdf-server/src/pdf-indexer.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@ import { populatePdfMetadata } from "./pdf-loader.js";
77

88
/** Check if URL is from arxiv.org */
99
export function isArxivUrl(url: string): boolean {
10-
return url.startsWith("https://arxiv.org/") || url.startsWith("http://arxiv.org/");
10+
return /^https?:\/\/arxiv\.org\//.test(url);
11+
}
12+
13+
/** Normalize arxiv URL to PDF format */
14+
export function normalizeArxivUrl(url: string): string {
15+
return url.replace(/arxiv\.org\/abs\//, "arxiv.org/pdf/");
1116
}
1217

1318
/** Check if URL is a file:// URL */

0 commit comments

Comments
 (0)