|
| 1 | +import type { |
| 2 | + BlobResourceContents, |
| 3 | + ReadResourceResult, |
| 4 | + Resource, |
| 5 | + ResourceTemplate, |
| 6 | + TextResourceContents, |
| 7 | +} from '@modelcontextprotocol/sdk/types.js'; |
| 8 | + |
| 9 | +import type { ApifyClient } from '../apify_client.js'; |
| 10 | +import { getApifyAPIBaseUrl } from '../apify_client.js'; |
| 11 | +import { KV_RECORD_MAX_INLINE_BYTES } from '../const.js'; |
| 12 | +import { getHttpStatusCode } from '../utils/logging.js'; |
| 13 | + |
| 14 | +/** Max recent datasets / stores to surface in resources/list. */ |
| 15 | +const RECENT_LIST_LIMIT = 10; |
| 16 | +/** Default page size baked into the advertised dataset-items URI so a naive read doesn't pull a whole dataset. */ |
| 17 | +const DEFAULT_DATASET_ITEMS_LIMIT = 20; |
| 18 | +const JSON_MIME_TYPE = 'application/json'; |
| 19 | +const TEXT_MIME_TYPE = 'text/plain'; |
| 20 | + |
| 21 | +/** |
| 22 | + * Resource templates returned by resources/templates/list, expressed as real Apify API |
| 23 | + * GET URLs (RFC 6570 templates). The read path is a generic proxy over any Apify API GET |
| 24 | + * endpoint, so these are just the common, discoverable starting points — not an exhaustive list. |
| 25 | + */ |
| 26 | +export const API_RESOURCE_TEMPLATES: ResourceTemplate[] = [ |
| 27 | + { |
| 28 | + uriTemplate: `${getApifyAPIBaseUrl()}/v2/datasets/{datasetId}/items{?format,clean,offset,limit,fields,omit,desc}`, |
| 29 | + name: 'Dataset items', |
| 30 | + description: 'Items of an Apify dataset, paginated and field-selectable.', |
| 31 | + mimeType: JSON_MIME_TYPE, |
| 32 | + }, |
| 33 | + { |
| 34 | + uriTemplate: `${getApifyAPIBaseUrl()}/v2/key-value-stores/{keyValueStoreId}/keys{?exclusiveStartKey,limit}`, |
| 35 | + name: 'Key-value store keys', |
| 36 | + description: 'Keys in an Apify key-value store, cursor-paginated.', |
| 37 | + mimeType: JSON_MIME_TYPE, |
| 38 | + }, |
| 39 | + { |
| 40 | + uriTemplate: `${getApifyAPIBaseUrl()}/v2/key-value-stores/{keyValueStoreId}/records/{recordKey}`, |
| 41 | + name: 'Key-value store record', |
| 42 | + description: 'A single record (text, JSON, or binary) from an Apify key-value store.', |
| 43 | + }, |
| 44 | +]; |
| 45 | + |
| 46 | +/** |
| 47 | + * True when the URI is an Apify API URL (same origin as the configured API base). |
| 48 | + * |
| 49 | + * This is the security gate for the generic read proxy: the apify-client attaches the |
| 50 | + * session token as an `Authorization` header to every outbound request, so we must only |
| 51 | + * hand it Apify API URLs — never an arbitrary host. |
| 52 | + */ |
| 53 | +export function isApifyApiUri(uri: string): boolean { |
| 54 | + try { |
| 55 | + return new URL(uri).origin === new URL(getApifyAPIBaseUrl()).origin; |
| 56 | + } catch { |
| 57 | + return false; |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +/** |
| 62 | + * List the user's recent datasets and key-value stores as concrete Apify API URLs. |
| 63 | + * Best-effort: returns `[]` when there is no client or the API errors, so the overall |
| 64 | + * resources/list still serves widgets and the usage guide. |
| 65 | + */ |
| 66 | +export async function listStorageResources(apifyClient?: ApifyClient): Promise<Resource[]> { |
| 67 | + if (!apifyClient) { |
| 68 | + return []; |
| 69 | + } |
| 70 | + const base = getApifyAPIBaseUrl(); |
| 71 | + const resources: Resource[] = []; |
| 72 | + try { |
| 73 | + const datasets = await apifyClient.datasets().list({ limit: RECENT_LIST_LIMIT, desc: true }); |
| 74 | + for (const dataset of datasets.items) { |
| 75 | + resources.push({ |
| 76 | + uri: `${base}/v2/datasets/${dataset.id}/items?limit=${DEFAULT_DATASET_ITEMS_LIMIT}`, |
| 77 | + name: dataset.name ?? dataset.id, |
| 78 | + description: `Dataset with ${dataset.itemCount} item(s).`, |
| 79 | + mimeType: JSON_MIME_TYPE, |
| 80 | + }); |
| 81 | + } |
| 82 | + } catch { |
| 83 | + // Ignore: best-effort listing. |
| 84 | + } |
| 85 | + try { |
| 86 | + const stores = await apifyClient.keyValueStores().list({ limit: RECENT_LIST_LIMIT, desc: true }); |
| 87 | + for (const store of stores.items) { |
| 88 | + resources.push({ |
| 89 | + uri: `${base}/v2/key-value-stores/${store.id}/keys`, |
| 90 | + name: store.name ?? store.id, |
| 91 | + description: 'Key-value store.', |
| 92 | + mimeType: JSON_MIME_TYPE, |
| 93 | + }); |
| 94 | + } |
| 95 | + } catch { |
| 96 | + // Ignore: best-effort listing. |
| 97 | + } |
| 98 | + return resources; |
| 99 | +} |
| 100 | + |
| 101 | +/** Matches an Apify key-value-store record path, capturing the store id and the record key. */ |
| 102 | +const KV_RECORD_PATH_RE = /^\/v2\/key-value-stores\/([^/]+)\/records\/(.+)$/; |
| 103 | + |
| 104 | +/** |
| 105 | + * Download URL for a binary too large to inline. For a key-value-store record URI, returns the |
| 106 | + * store's signed `recordPublicUrl` — fetchable without an API token when the client can read the |
| 107 | + * store's URL signing key. Falls back to the original API URL for any other endpoint, or if minting |
| 108 | + * the signed URL fails (fetching that link then needs a token). |
| 109 | + */ |
| 110 | +async function getRecordDownloadUrl(uri: string, apifyClient: ApifyClient): Promise<string> { |
| 111 | + let pathname: string; |
| 112 | + try { |
| 113 | + pathname = new URL(uri).pathname; |
| 114 | + } catch { |
| 115 | + return uri; |
| 116 | + } |
| 117 | + const match = KV_RECORD_PATH_RE.exec(pathname); |
| 118 | + if (!match) return uri; |
| 119 | + try { |
| 120 | + const store = apifyClient.keyValueStore(decodeURIComponent(match[1])); |
| 121 | + return await store.getRecordPublicUrl(decodeURIComponent(match[2])); |
| 122 | + } catch { |
| 123 | + return uri; |
| 124 | + } |
| 125 | +} |
| 126 | + |
| 127 | +/** Single explanatory text-contents result for a not-found / no-token / refused read. */ |
| 128 | +function buildTextResult(uri: string, text: string): ReadResourceResult { |
| 129 | + return { contents: [{ uri, mimeType: TEXT_MIME_TYPE, text } satisfies TextResourceContents] }; |
| 130 | +} |
| 131 | + |
| 132 | +/** |
| 133 | + * Read any Apify API GET endpoint as an MCP resource. |
| 134 | + * |
| 135 | + * A thin proxy: the apify-client injects the session token (and MCP-origin / payment headers), |
| 136 | + * performs the GET, and parses the body by Content-Type — JSON to an object, text/xml to a |
| 137 | + * string, anything else to a Buffer, an empty body to `undefined`. We branch on that resulting |
| 138 | + * JS type, not the MIME type. Errors (a missing resource, a bad token, a 5xx) never throw; they |
| 139 | + * return an explanatory text block, matching the resources/read soft-fail contract. |
| 140 | + */ |
| 141 | +export async function readApiResource(uri: string, apifyClient?: ApifyClient): Promise<ReadResourceResult> { |
| 142 | + if (!apifyClient) { |
| 143 | + return buildTextResult(uri, `Cannot read ${uri}: no Apify token in this session.`); |
| 144 | + } |
| 145 | + if (!isApifyApiUri(uri)) { |
| 146 | + return buildTextResult( |
| 147 | + uri, |
| 148 | + `Cannot read ${uri}: only Apify API URLs (${getApifyAPIBaseUrl()}) are readable as resources.`, |
| 149 | + ); |
| 150 | + } |
| 151 | + |
| 152 | + let response: { data: unknown; headers: Record<string, unknown> }; |
| 153 | + try { |
| 154 | + // Default responseType is `arraybuffer`, which lets the client's parse interceptor decode |
| 155 | + // the body by Content-Type. Do NOT set `forceBuffer` — that would keep everything as raw bytes. |
| 156 | + response = await apifyClient.httpClient.call({ url: uri, method: 'GET', responseType: 'arraybuffer' }); |
| 157 | + } catch (err) { |
| 158 | + const status = getHttpStatusCode(err); |
| 159 | + const message = err instanceof Error ? err.message : String(err); |
| 160 | + return buildTextResult(uri, `Failed to read ${uri}: ${status ? `HTTP ${status}: ` : ''}${message}`); |
| 161 | + } |
| 162 | + |
| 163 | + const contentTypeHeader = response.headers['content-type']; |
| 164 | + const contentType = typeof contentTypeHeader === 'string' ? contentTypeHeader : undefined; |
| 165 | + const { data } = response; |
| 166 | + |
| 167 | + // An empty body (e.g. an Actor that wrote an empty OUTPUT) is legitimate; emit empty text. |
| 168 | + if (data === undefined || data === null) { |
| 169 | + return buildTextResult(uri, ''); |
| 170 | + } |
| 171 | + |
| 172 | + if (Buffer.isBuffer(data)) { |
| 173 | + const mimeType = contentType?.split(';')[0].trim().toLowerCase(); |
| 174 | + // Inlining a large binary as base64 would blow up the client's context, so above the inline |
| 175 | + // limit link out instead: a JSON text block with the URL, size, and type (resources/read has |
| 176 | + // no resource_link content type). For a key-value-store record the link is the signed public |
| 177 | + // URL, fetchable without a token; other endpoints fall back to the (token-gated) API URL. |
| 178 | + if (data.length > KV_RECORD_MAX_INLINE_BYTES) { |
| 179 | + const downloadUrl = await getRecordDownloadUrl(uri, apifyClient); |
| 180 | + return { |
| 181 | + contents: [ |
| 182 | + { |
| 183 | + uri, |
| 184 | + mimeType: JSON_MIME_TYPE, |
| 185 | + text: JSON.stringify({ uri: downloadUrl, size: data.length, ...(mimeType && { mimeType }) }), |
| 186 | + } satisfies TextResourceContents, |
| 187 | + ], |
| 188 | + }; |
| 189 | + } |
| 190 | + return { |
| 191 | + contents: [ |
| 192 | + { uri, ...(mimeType && { mimeType }), blob: data.toString('base64') } satisfies BlobResourceContents, |
| 193 | + ], |
| 194 | + }; |
| 195 | + } |
| 196 | + |
| 197 | + // JSON (already parsed to an object/array) or text/xml (a string). A string is emitted verbatim |
| 198 | + // with its declared Content-Type; anything else is lossless-serialized as JSON. |
| 199 | + const text = typeof data === 'string' ? data : JSON.stringify(data); |
| 200 | + return { |
| 201 | + contents: [ |
| 202 | + { |
| 203 | + uri, |
| 204 | + mimeType: contentType ?? (typeof data === 'string' ? TEXT_MIME_TYPE : JSON_MIME_TYPE), |
| 205 | + text, |
| 206 | + } satisfies TextResourceContents, |
| 207 | + ], |
| 208 | + }; |
| 209 | +} |
0 commit comments