Skip to content

Commit fb190c5

Browse files
committed
fix: re-work resource interface to with with api.apify.com
resources are now pulled from the api.apify.com, and large binaries return signed resource links
1 parent 916fb21 commit fb190c5

9 files changed

Lines changed: 523 additions & 700 deletions

File tree

src/resources/AGENTS.md

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,33 @@ Three files serving the MCP `resources/*` surface:
88
- `resource_service.ts` — handles `ListResources` / `ListResourceTemplates` /
99
read-resource requests. Takes an optional `apifyClient` on list/read; the server
1010
builds it from the per-request token (`_meta.apifyToken || options.token`).
11-
- `storage_resources.ts`exposes Apify storage **data reads** as resources
12-
(alongside, not replacing, the storage tools).
11+
- `api_resources.ts`a thin MCP-resource proxy over the Apify API: any Apify API GET
12+
endpoint is readable as a resource, identified by its real API URL.
1313
- `widgets.ts` — the registry of UI widgets (the metadata that maps a widget name to
1414
its resource); the widgets themselves are built in [`../web`](../web/AGENTS.md).
1515

16-
## Storage resources (`storage_resources.ts`)
17-
18-
Custom `apify://` scheme, parsed by stripping the prefix, splitting the path on `/`,
19-
and reading the query with `URLSearchParams` (the `recordKey` is URL-decoded). Three
20-
templates (`resources/templates/list`):
21-
22-
- `apify://datasets/{datasetId}/items{?offset,limit,fields,omit,clean,desc}` — dataset items
23-
- `apify://key-value-stores/{keyValueStoreId}/keys{?exclusiveStartKey,limit}` — KVS key listing
24-
- `apify://key-value-stores/{keyValueStoreId}/records/{recordKey}` — a single KVS record
25-
26-
`resources/list` adds concrete URIs for the user's recent datasets/stores
27-
(`desc: true`, bounded). Contents are `application/json` for items/keys; records keep
28-
their `contentType` (binary → base64 `blob`). A binary record over
29-
`KV_RECORD_MAX_INLINE_BYTES` (256 KB) links out instead of inlining: a JSON text block
30-
with the record's public URL (`resources/read` has no `resource_link` content type),
31-
mirroring the `get-key-value-store-record` tool. Best-effort: no token / API error →
32-
list omits storage; an unreadable read returns an explanatory `text` block, never an
33-
error. Reuses the storage tools' arg-parsing helpers and 404→soft-fail pattern; it
34-
does **not** share their response builders (resources need `ReadResourceResult`).
16+
## API resources (`api_resources.ts`)
17+
18+
Resource URIs are real Apify API GET URLs (`https://api.apify.com/v2/...`), so URLs that
19+
Actors and tools return in their responses can be read back verbatim — no scheme to
20+
translate. `isApifyApiUri()` gates reads to the configured API origin
21+
(`getApifyAPIBaseUrl()`): the apify-client attaches the session token as an `Authorization`
22+
header to **every** outbound request, so we must never hand it a non-Apify host.
23+
24+
`readApiResource()` is a generic proxy: `apifyClient.httpClient.call({ method: 'GET', responseType: 'arraybuffer' })`
25+
(do **not** set `forceBuffer` — that skips the client's Content-Type parsing). The parsed
26+
body is JSON → object, text/xml → string, anything else → `Buffer`, empty → `undefined`;
27+
we branch on that JS type, not the MIME type. Buffers over `KV_RECORD_MAX_INLINE_BYTES`
28+
(256 KB) link out — a JSON text block with the URL + size + type (`resources/read` has no
29+
`resource_link` content type) — instead of inlining base64. For a KVS record the URL is the
30+
store's signed `recordPublicUrl` (fetchable without a token); other endpoints fall back to the
31+
token-gated API URL. Errors never throw: a missing resource, bad token, or 5xx returns an
32+
explanatory `text` block.
33+
34+
`resources/templates/list` advertises three common starting points (dataset items, KVS
35+
keys, KVS record); `resources/list` adds the user's recent datasets/stores as concrete API
36+
URLs (`desc: true`, bounded, dataset URI carries a default `limit`). Both are best-effort:
37+
no token / API error → those entries are omitted.
3538

3639
## Gotcha
3740

src/resources/api_resources.ts

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
import type {
2+
BlobResourceContents,
3+
ReadResourceResult,
4+
Resource,
5+
ResourceTemplate,
6+
TextResourceContents,
7+
} from '@modelcontextprotocol/sdk/types.js';
8+
9+
import type { ApifyClient } from '../apify_client.js';
10+
import { getApifyAPIBaseUrl } from '../apify_client.js';
11+
import { KV_RECORD_MAX_INLINE_BYTES } from '../const.js';
12+
import { getHttpStatusCode } from '../utils/logging.js';
13+
14+
/** Max recent datasets / stores to surface in resources/list. */
15+
const RECENT_LIST_LIMIT = 10;
16+
/** Default page size baked into the advertised dataset-items URI so a naive read doesn't pull a whole dataset. */
17+
const DEFAULT_DATASET_ITEMS_LIMIT = 20;
18+
const JSON_MIME_TYPE = 'application/json';
19+
const TEXT_MIME_TYPE = 'text/plain';
20+
21+
/**
22+
* Resource templates returned by resources/templates/list, expressed as real Apify API
23+
* GET URLs (RFC 6570 templates). The read path is a generic proxy over any Apify API GET
24+
* endpoint, so these are just the common, discoverable starting points — not an exhaustive list.
25+
*/
26+
export const API_RESOURCE_TEMPLATES: ResourceTemplate[] = [
27+
{
28+
uriTemplate: `${getApifyAPIBaseUrl()}/v2/datasets/{datasetId}/items{?format,clean,offset,limit,fields,omit,desc}`,
29+
name: 'Dataset items',
30+
description: 'Items of an Apify dataset, paginated and field-selectable.',
31+
mimeType: JSON_MIME_TYPE,
32+
},
33+
{
34+
uriTemplate: `${getApifyAPIBaseUrl()}/v2/key-value-stores/{keyValueStoreId}/keys{?exclusiveStartKey,limit}`,
35+
name: 'Key-value store keys',
36+
description: 'Keys in an Apify key-value store, cursor-paginated.',
37+
mimeType: JSON_MIME_TYPE,
38+
},
39+
{
40+
uriTemplate: `${getApifyAPIBaseUrl()}/v2/key-value-stores/{keyValueStoreId}/records/{recordKey}`,
41+
name: 'Key-value store record',
42+
description: 'A single record (text, JSON, or binary) from an Apify key-value store.',
43+
},
44+
];
45+
46+
/**
47+
* True when the URI is an Apify API URL (same origin as the configured API base).
48+
*
49+
* This is the security gate for the generic read proxy: the apify-client attaches the
50+
* session token as an `Authorization` header to every outbound request, so we must only
51+
* hand it Apify API URLs — never an arbitrary host.
52+
*/
53+
export function isApifyApiUri(uri: string): boolean {
54+
try {
55+
return new URL(uri).origin === new URL(getApifyAPIBaseUrl()).origin;
56+
} catch {
57+
return false;
58+
}
59+
}
60+
61+
/**
62+
* List the user's recent datasets and key-value stores as concrete Apify API URLs.
63+
* Best-effort: returns `[]` when there is no client or the API errors, so the overall
64+
* resources/list still serves widgets and the usage guide.
65+
*/
66+
export async function listStorageResources(apifyClient?: ApifyClient): Promise<Resource[]> {
67+
if (!apifyClient) {
68+
return [];
69+
}
70+
const base = getApifyAPIBaseUrl();
71+
const resources: Resource[] = [];
72+
try {
73+
const datasets = await apifyClient.datasets().list({ limit: RECENT_LIST_LIMIT, desc: true });
74+
for (const dataset of datasets.items) {
75+
resources.push({
76+
uri: `${base}/v2/datasets/${dataset.id}/items?limit=${DEFAULT_DATASET_ITEMS_LIMIT}`,
77+
name: dataset.name ?? dataset.id,
78+
description: `Dataset with ${dataset.itemCount} item(s).`,
79+
mimeType: JSON_MIME_TYPE,
80+
});
81+
}
82+
} catch {
83+
// Ignore: best-effort listing.
84+
}
85+
try {
86+
const stores = await apifyClient.keyValueStores().list({ limit: RECENT_LIST_LIMIT, desc: true });
87+
for (const store of stores.items) {
88+
resources.push({
89+
uri: `${base}/v2/key-value-stores/${store.id}/keys`,
90+
name: store.name ?? store.id,
91+
description: 'Key-value store.',
92+
mimeType: JSON_MIME_TYPE,
93+
});
94+
}
95+
} catch {
96+
// Ignore: best-effort listing.
97+
}
98+
return resources;
99+
}
100+
101+
/** Matches an Apify key-value-store record path, capturing the store id and the record key. */
102+
const KV_RECORD_PATH_RE = /^\/v2\/key-value-stores\/([^/]+)\/records\/(.+)$/;
103+
104+
/**
105+
* Download URL for a binary too large to inline. For a key-value-store record URI, returns the
106+
* store's signed `recordPublicUrl` — fetchable without an API token when the client can read the
107+
* store's URL signing key. Falls back to the original API URL for any other endpoint, or if minting
108+
* the signed URL fails (fetching that link then needs a token).
109+
*/
110+
async function getRecordDownloadUrl(uri: string, apifyClient: ApifyClient): Promise<string> {
111+
let pathname: string;
112+
try {
113+
pathname = new URL(uri).pathname;
114+
} catch {
115+
return uri;
116+
}
117+
const match = KV_RECORD_PATH_RE.exec(pathname);
118+
if (!match) return uri;
119+
try {
120+
const store = apifyClient.keyValueStore(decodeURIComponent(match[1]));
121+
return await store.getRecordPublicUrl(decodeURIComponent(match[2]));
122+
} catch {
123+
return uri;
124+
}
125+
}
126+
127+
/** Single explanatory text-contents result for a not-found / no-token / refused read. */
128+
function buildTextResult(uri: string, text: string): ReadResourceResult {
129+
return { contents: [{ uri, mimeType: TEXT_MIME_TYPE, text } satisfies TextResourceContents] };
130+
}
131+
132+
/**
133+
* Read any Apify API GET endpoint as an MCP resource.
134+
*
135+
* A thin proxy: the apify-client injects the session token (and MCP-origin / payment headers),
136+
* performs the GET, and parses the body by Content-Type — JSON to an object, text/xml to a
137+
* string, anything else to a Buffer, an empty body to `undefined`. We branch on that resulting
138+
* JS type, not the MIME type. Errors (a missing resource, a bad token, a 5xx) never throw; they
139+
* return an explanatory text block, matching the resources/read soft-fail contract.
140+
*/
141+
export async function readApiResource(uri: string, apifyClient?: ApifyClient): Promise<ReadResourceResult> {
142+
if (!apifyClient) {
143+
return buildTextResult(uri, `Cannot read ${uri}: no Apify token in this session.`);
144+
}
145+
if (!isApifyApiUri(uri)) {
146+
return buildTextResult(
147+
uri,
148+
`Cannot read ${uri}: only Apify API URLs (${getApifyAPIBaseUrl()}) are readable as resources.`,
149+
);
150+
}
151+
152+
let response: { data: unknown; headers: Record<string, unknown> };
153+
try {
154+
// Default responseType is `arraybuffer`, which lets the client's parse interceptor decode
155+
// the body by Content-Type. Do NOT set `forceBuffer` — that would keep everything as raw bytes.
156+
response = await apifyClient.httpClient.call({ url: uri, method: 'GET', responseType: 'arraybuffer' });
157+
} catch (err) {
158+
const status = getHttpStatusCode(err);
159+
const message = err instanceof Error ? err.message : String(err);
160+
return buildTextResult(uri, `Failed to read ${uri}: ${status ? `HTTP ${status}: ` : ''}${message}`);
161+
}
162+
163+
const contentTypeHeader = response.headers['content-type'];
164+
const contentType = typeof contentTypeHeader === 'string' ? contentTypeHeader : undefined;
165+
const { data } = response;
166+
167+
// An empty body (e.g. an Actor that wrote an empty OUTPUT) is legitimate; emit empty text.
168+
if (data === undefined || data === null) {
169+
return buildTextResult(uri, '');
170+
}
171+
172+
if (Buffer.isBuffer(data)) {
173+
const mimeType = contentType?.split(';')[0].trim().toLowerCase();
174+
// Inlining a large binary as base64 would blow up the client's context, so above the inline
175+
// limit link out instead: a JSON text block with the URL, size, and type (resources/read has
176+
// no resource_link content type). For a key-value-store record the link is the signed public
177+
// URL, fetchable without a token; other endpoints fall back to the (token-gated) API URL.
178+
if (data.length > KV_RECORD_MAX_INLINE_BYTES) {
179+
const downloadUrl = await getRecordDownloadUrl(uri, apifyClient);
180+
return {
181+
contents: [
182+
{
183+
uri,
184+
mimeType: JSON_MIME_TYPE,
185+
text: JSON.stringify({ uri: downloadUrl, size: data.length, ...(mimeType && { mimeType }) }),
186+
} satisfies TextResourceContents,
187+
],
188+
};
189+
}
190+
return {
191+
contents: [
192+
{ uri, ...(mimeType && { mimeType }), blob: data.toString('base64') } satisfies BlobResourceContents,
193+
],
194+
};
195+
}
196+
197+
// JSON (already parsed to an object/array) or text/xml (a string). A string is emitted verbatim
198+
// with its declared Content-Type; anything else is lossless-serialized as JSON.
199+
const text = typeof data === 'string' ? data : JSON.stringify(data);
200+
return {
201+
contents: [
202+
{
203+
uri,
204+
mimeType: contentType ?? (typeof data === 'string' ? TEXT_MIME_TYPE : JSON_MIME_TYPE),
205+
text,
206+
} satisfies TextResourceContents,
207+
],
208+
};
209+
}

src/resources/resource_service.ts

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,7 @@ import log from '@apify/log';
1111
import type { ApifyClient } from '../apify_client.js';
1212
import type { PaymentProvider } from '../payments/types.js';
1313
import { ServerMode } from '../types.js';
14-
import {
15-
isStorageUri,
16-
listStorageResources,
17-
readStorageResource,
18-
STORAGE_RESOURCE_TEMPLATES,
19-
} from './storage_resources.js';
14+
import { API_RESOURCE_TEMPLATES, isApifyApiUri, listStorageResources, readApiResource } from './api_resources.js';
2015
import type { AvailableWidget } from './widgets.js';
2116
import { RESOURCE_MIME_TYPE } from './widgets.js';
2217

@@ -86,9 +81,9 @@ export function createResourceService(options: ResourceServiceOptions): Resource
8681
};
8782

8883
const readResource = async (uri: string, apifyClient?: ApifyClient): Promise<ExtendedReadResourceResult> => {
89-
if (isStorageUri(uri)) {
90-
// Storage contents carry no widget `_meta`/`html`; the extended shape only adds optional fields.
91-
return (await readStorageResource(uri, apifyClient)) as ExtendedReadResourceResult;
84+
if (isApifyApiUri(uri)) {
85+
// API contents carry no widget `_meta`/`html`; the extended shape only adds optional fields.
86+
return (await readApiResource(uri, apifyClient)) as ExtendedReadResourceResult;
9287
}
9388

9489
const usageGuide = paymentProvider?.getUsageGuide?.();
@@ -173,7 +168,7 @@ export function createResourceService(options: ResourceServiceOptions): Resource
173168
};
174169

175170
const listResourceTemplates = async (): Promise<ListResourceTemplatesResult> => ({
176-
resourceTemplates: STORAGE_RESOURCE_TEMPLATES,
171+
resourceTemplates: API_RESOURCE_TEMPLATES,
177172
});
178173

179174
return {

0 commit comments

Comments
 (0)