Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions core-web/apps/mcp-server/src/tools/execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ Tips:
- Use \`pick(arr, fields)\` to return only the fields you need — responses can be very large
- For file uploads use \`formData\` with \`{ name, type, data }\` (base64) or \`{ name, type, url }\` (remote URL)

Binary responses (file assets — images, fonts, PDFs, etc.):
- Endpoints that return non-text bodies (e.g. GET \`/api/v2/assets/{identifier}\` and \`/dA/{id}\`, content-type \`application/octet-stream\` or \`image/*\`) come back as an envelope: \`{ __dotcmsBinary: true, contentType, base64, byteLength }\`.
- The \`base64\` field IS the raw file bytes — base64-decode it to recover the exact file. Do NOT treat it as text; the bytes are intact (not UTF-8-mangled).
- JSON and textual responses (\`text/*\`, xml, js, \`+json\`/\`+xml\`) are returned as parsed objects / strings as before — only binary bodies use the envelope.

Block Editor (Story Block) fields:
- A Story Block field stores a string. When creating or updating content via a fire endpoint, send the field value as an **HTML or Markdown string** — do NOT hand-author the ProseMirror/JSON document. dotCMS stores it as-is and converts it to the Block Editor structure when the contentlet is opened in the editor.
- Example: \`{ "contentType": "Blog", "title": "My Post", "body": "<h2>Intro</h2><p>Hello <strong>world</strong>.</p>" }\` — where \`body\` is the Story Block field.
Expand Down
4 changes: 2 additions & 2 deletions core-web/libs/agentic-tools/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
export { Executor, createExecutor } from './lib/executor';
export type { ExecutorOptions } from './lib/executor';

export { createApiAdapter } from './lib/http-client';
export type { ApiAdapterConfig } from './lib/http-client';
export { createApiAdapter, isBinaryResponseEnvelope } from './lib/http-client';
export type { ApiAdapterConfig, BinaryResponseEnvelope } from './lib/http-client';

export { createSandbox } from './lib/sandbox';
export type { ISandbox, SandboxFactory } from './lib/sandbox/interface';
Expand Down
205 changes: 205 additions & 0 deletions core-web/libs/agentic-tools/src/lib/http-client.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import { createApiAdapter, isBinaryResponseEnvelope } from './http-client';

import type { Adapter, AdapterMethod } from './types';

/**
* A real 1x1 red PNG. Its first byte is 0x89, which is not valid UTF-8 — the
* exact kind of byte that `response.text()` corrupts into U+FFFD. This is the
* regression fixture for the binary-response corruption bug.
*/
const PNG_BASE64 =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==';
const PNG_BYTES = Buffer.from(PNG_BASE64, 'base64');

const CONFIG = { dotcmsUrl: 'https://example.dotcms.com', authToken: 'test-token' };

function getRequestMethod(adapter: Adapter): AdapterMethod {
const method = adapter.methods.get('request');
if (!method) {
throw new Error('request method not registered');
}
return method;
}

/** Build a Response-like stub backed by a fixed body buffer. */
function makeResponse(
body: Buffer | string,
{
contentType,
ok = true,
status = 200,
statusText = 'OK',
contentLength
}: {
contentType: string;
ok?: boolean;
status?: number;
statusText?: string;
// Override the Content-Length header independently of the actual body —
// lets us simulate a server that advertises an oversized response.
contentLength?: string;
}
): Response {
const buffer = typeof body === 'string' ? Buffer.from(body, 'utf-8') : body;
const headerValues: Record<string, string | null> = {
'content-type': contentType,
'content-length': contentLength ?? String(buffer.byteLength)
};
return {
ok,
status,
statusText,
headers: { get: (name: string) => headerValues[name.toLowerCase()] ?? null },
json: async () => JSON.parse(buffer.toString('utf-8')),
text: async () => buffer.toString('utf-8'),
arrayBuffer: async () =>
buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
} as unknown as Response;
}

describe('createApiAdapter response parsing', () => {
const fetchMock = jest.fn();

beforeEach(() => {
fetchMock.mockReset();
global.fetch = fetchMock as unknown as typeof fetch;
});

it('round-trips a binary (PNG) body as a base64 envelope without corrupting bytes', async () => {
fetchMock.mockResolvedValue(makeResponse(PNG_BYTES, { contentType: 'image/png' }));

const adapter = createApiAdapter(CONFIG);
const result = await getRequestMethod(adapter).execute({ path: '/dA/abc123' });

expect(isBinaryResponseEnvelope(result)).toBe(true);
const envelope = result as {
__dotcmsBinary: true;
contentType: string;
base64: string;
byteLength: number;
};
expect(envelope.contentType).toBe('image/png');
expect(envelope.byteLength).toBe(PNG_BYTES.byteLength);
// The decoded bytes must be byte-exact to the source — the actual regression guard.
expect(Buffer.from(envelope.base64, 'base64').equals(PNG_BYTES)).toBe(true);
});

it('parses JSON content as an object', async () => {
fetchMock.mockResolvedValue(
makeResponse(JSON.stringify({ hello: 'world' }), { contentType: 'application/json' })
);

const adapter = createApiAdapter(CONFIG);
const result = await getRequestMethod(adapter).execute({ path: '/api/v1/x' });

expect(result).toEqual({ hello: 'world' });
});

it('returns textual content types as strings', async () => {
fetchMock.mockResolvedValue(
makeResponse('<root/>', { contentType: 'application/xml; charset=utf-8' })
);

const adapter = createApiAdapter(CONFIG);
const result = await getRequestMethod(adapter).execute({ path: '/api/x.xml' });

expect(result).toBe('<root/>');
});

it('treats +json content types as textual strings', async () => {
fetchMock.mockResolvedValue(
makeResponse('{"a":1}', { contentType: 'application/vnd.api+json' })
);

const adapter = createApiAdapter(CONFIG);
const result = await getRequestMethod(adapter).execute({ path: '/api/x' });

expect(result).toBe('{"a":1}');
});

it('forces the binary path when responseType is "base64", even for JSON', async () => {
fetchMock.mockResolvedValue(
makeResponse(JSON.stringify({ hello: 'world' }), { contentType: 'application/json' })
);

const adapter = createApiAdapter(CONFIG);
const result = await getRequestMethod(adapter).execute({
path: '/api/v1/x',
responseType: 'base64'
});

expect(isBinaryResponseEnvelope(result)).toBe(true);
});

it('reads the error body as text regardless of content-type', async () => {
fetchMock.mockResolvedValue(
makeResponse('<html>Not Found</html>', {
contentType: 'text/html',
ok: false,
status: 404,
statusText: 'Not Found'
})
);

const adapter = createApiAdapter(CONFIG);
await expect(getRequestMethod(adapter).execute({ path: '/dA/missing' })).rejects.toThrow(
'HTTP 404 Not Found: <html>Not Found</html>'
);
});

it('rejects an oversized binary response via Content-Length before buffering', async () => {
const oversized = String(26 * 1024 * 1024); // 26MB > 25MB cap
const arrayBuffer = jest.fn();
fetchMock.mockResolvedValue({
ok: true,
status: 200,
statusText: 'OK',
headers: {
get: (name: string) =>
name.toLowerCase() === 'content-type'
? 'application/octet-stream'
: name.toLowerCase() === 'content-length'
? oversized
: null
},
arrayBuffer
} as unknown as Response);

const adapter = createApiAdapter(CONFIG);
await expect(getRequestMethod(adapter).execute({ path: '/dA/huge' })).rejects.toThrow(
'exceeds the'
);
// The body must never be buffered when Content-Length already exceeds the cap.
expect(arrayBuffer).not.toHaveBeenCalled();
});

describe('isBinaryResponseEnvelope', () => {
it('accepts a fully-formed envelope', () => {
expect(
isBinaryResponseEnvelope({
__dotcmsBinary: true,
contentType: 'image/png',
base64: 'AA==',
byteLength: 1
})
).toBe(true);
});

it('rejects an envelope missing contentType or byteLength', () => {
expect(isBinaryResponseEnvelope({ __dotcmsBinary: true, base64: 'AA==' })).toBe(false);
expect(
isBinaryResponseEnvelope({
__dotcmsBinary: true,
base64: 'AA==',
contentType: 'image/png'
})
).toBe(false);
});

it('rejects non-envelope values', () => {
expect(isBinaryResponseEnvelope(null)).toBe(false);
expect(isBinaryResponseEnvelope('string')).toBe(false);
expect(isBinaryResponseEnvelope({ hello: 'world' })).toBe(false);
});
});
});
118 changes: 106 additions & 12 deletions core-web/libs/agentic-tools/src/lib/http-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ interface RequestOptions {
body?: unknown;
formData?: Record<string, FormDataFieldValue>;
headers?: Record<string, string>;
// How to decode the response body. Defaults to content-type auto-detection:
// JSON content types are parsed; textual types come back as strings; everything
// else (images, fonts, etc.) comes back as a base64 binary envelope so the bytes
// survive the JSON.stringify boundary in the consuming sandbox. Set 'base64' to
// force the binary path regardless of the declared content-type.
responseType?: 'auto' | 'base64';
}

function isFileDescriptor(value: unknown): value is FileFieldDescriptor {
Expand All @@ -35,6 +41,89 @@ const MAX_REMOTE_FILE_BYTES = 25 * 1024 * 1024; // 25 MB
// Timeout (ms) for the remote fetch, so a slow/hanging URL cannot stall the host.
const REMOTE_FILE_FETCH_TIMEOUT_MS = 15000;

// Max size (bytes) for a binary response body returned as a base64 envelope.
// base64 inflates the payload ~33% and the whole thing flows through
// JSON.stringify in the consuming sandbox, so large assets can blow up memory
// and model context — cap it like the upload side already does.
const MAX_BINARY_RESPONSE_BYTES = 25 * 1024 * 1024; // 25 MB

/**
* Tagged envelope returned for non-textual response bodies. The raw bytes are
* base64-encoded so they survive the `JSON.stringify` serialization boundary in
* `execute.ts` intact — `response.text()` would corrupt any non-UTF-8 byte into
* the U+FFFD replacement char. Consumers detect `__dotcmsBinary` and decode.
*/
export interface BinaryResponseEnvelope {
__dotcmsBinary: true;
contentType: string;
base64: string;
byteLength: number;
}

/**
* Type guard for the binary response envelope. Consumers can use this to detect
* a binary body and `Buffer.from(envelope.base64, 'base64')` to recover the bytes.
*/
export function isBinaryResponseEnvelope(value: unknown): value is BinaryResponseEnvelope {
if (typeof value !== 'object' || value === null) {
return false;
}
const obj = value as Record<string, unknown>;
return (
obj.__dotcmsBinary === true &&
typeof obj.base64 === 'string' &&
typeof obj.contentType === 'string' &&
typeof obj.byteLength === 'number'
);
}

/**
* Decide whether a content-type should be decoded as text. Everything that is
* not JSON (handled separately) and not in this textual set is treated as
* binary and returned as a base64 envelope.
*/
function isTextualContentType(contentType: string): boolean {
const ct = contentType.toLowerCase();
return (
ct.startsWith('text/') ||
ct.includes('application/xml') ||
ct.includes('application/javascript') ||
ct.includes('application/x-www-form-urlencoded') ||
ct.includes('+json') ||
ct.includes('+xml')
);
}

/**
* Read a response body as a base64 binary envelope, enforcing the size cap.
*/
async function readBinaryResponse(
response: Response,
contentType: string
): Promise<BinaryResponseEnvelope> {
// Reject early via Content-Length so we never buffer an oversized body into
// memory. The header can be absent or lie, so the post-read check below stays
// as the authoritative backstop.
const declaredLength = Number(response.headers.get('content-length'));
if (Number.isFinite(declaredLength) && declaredLength > MAX_BINARY_RESPONSE_BYTES) {
throw new Error(
`Binary response (${declaredLength} bytes) exceeds the ${MAX_BINARY_RESPONSE_BYTES}-byte limit`
);
}
const buffer = await response.arrayBuffer();
if (buffer.byteLength > MAX_BINARY_RESPONSE_BYTES) {
throw new Error(
`Binary response (${buffer.byteLength} bytes) exceeds the ${MAX_BINARY_RESPONSE_BYTES}-byte limit`
);
}
return {
__dotcmsBinary: true,
contentType,
base64: Buffer.from(buffer).toString('base64'),
byteLength: buffer.byteLength
};
}

/**
* Validates a user-supplied file URL before fetching it, to mitigate SSRF.
* Sandbox code can put any string in `desc.url`, and the fetch runs on the
Expand Down Expand Up @@ -216,23 +305,28 @@ export function createApiAdapter(config: ApiAdapterConfig): Adapter {

const response = await fetch(url.toString(), fetchOptions);

// Parse response
// Parse response.
const contentType = response.headers.get('content-type') || '';
let data: unknown;

if (contentType.includes('application/json')) {
data = await response.json();
} else {
data = await response.text();
}

// On error, always read the body as text regardless of the declared
// content-type — dotCMS errors come back as HTML/text and we want a
// readable message, not a base64 envelope of the error page.
if (!response.ok) {
throw new Error(
`HTTP ${response.status} ${response.statusText}: ${typeof data === 'string' ? data : JSON.stringify(data)}`
);
const errorBody = await response.text();
throw new Error(`HTTP ${response.status} ${response.statusText}: ${errorBody}`);
}

return data;
const forceBinary = options.responseType === 'base64';

if (!forceBinary && contentType.includes('application/json')) {
return await response.json();
}
Comment thread
fmontes marked this conversation as resolved.
if (!forceBinary && isTextualContentType(contentType)) {
return await response.text();
}
// Non-JSON, non-textual (or explicitly requested): return a base64
// envelope so the raw bytes survive JSON.stringify intact.
return await readBinaryResponse(response, contentType);
}
};

Expand Down
3 changes: 2 additions & 1 deletion core-web/libs/agentic-tools/tsconfig.lib.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"types": ["node"],
"resolveJsonModule": true
},
"include": ["src/**/*.ts", "src/**/*.json"]
"include": ["src/**/*.ts", "src/**/*.json"],
"exclude": ["src/**/*.spec.ts", "src/**/*.test.ts"]
}
Loading