diff --git a/core-web/apps/mcp-server/src/tools/execute.ts b/core-web/apps/mcp-server/src/tools/execute.ts
index fe53968b4802..28d42a0ae090 100644
--- a/core-web/apps/mcp-server/src/tools/execute.ts
+++ b/core-web/apps/mcp-server/src/tools/execute.ts
@@ -43,6 +43,11 @@ Tips:
- Use \`pick(arr, fields)\` to return only the fields you need — responses can be very large
- For file uploads use \`formData\` with \`{ name, type, data }\` (base64) or \`{ name, type, url }\` (remote URL)
+Binary responses (file assets — images, fonts, PDFs, etc.):
+- Endpoints that return non-text bodies (e.g. GET \`/api/v2/assets/{identifier}\` and \`/dA/{id}\`, content-type \`application/octet-stream\` or \`image/*\`) come back as an envelope: \`{ __dotcmsBinary: true, contentType, base64, byteLength }\`.
+- The \`base64\` field IS the raw file bytes — base64-decode it to recover the exact file. Do NOT treat it as text; the bytes are intact (not UTF-8-mangled).
+- JSON and textual responses (\`text/*\`, xml, js, \`+json\`/\`+xml\`) are returned as parsed objects / strings as before — only binary bodies use the envelope.
+
Block Editor (Story Block) fields:
- A Story Block field stores a string. When creating or updating content via a fire endpoint, send the field value as an **HTML or Markdown string** — do NOT hand-author the ProseMirror/JSON document. dotCMS stores it as-is and converts it to the Block Editor structure when the contentlet is opened in the editor.
- Example: \`{ "contentType": "Blog", "title": "My Post", "body": "
Intro
Hello world.
" }\` — where \`body\` is the Story Block field.
diff --git a/core-web/libs/agentic-tools/src/index.ts b/core-web/libs/agentic-tools/src/index.ts
index 2232fa04933c..4110219556e5 100644
--- a/core-web/libs/agentic-tools/src/index.ts
+++ b/core-web/libs/agentic-tools/src/index.ts
@@ -1,8 +1,8 @@
export { Executor, createExecutor } from './lib/executor';
export type { ExecutorOptions } from './lib/executor';
-export { createApiAdapter } from './lib/http-client';
-export type { ApiAdapterConfig } from './lib/http-client';
+export { createApiAdapter, isBinaryResponseEnvelope } from './lib/http-client';
+export type { ApiAdapterConfig, BinaryResponseEnvelope } from './lib/http-client';
export { createSandbox } from './lib/sandbox';
export type { ISandbox, SandboxFactory } from './lib/sandbox/interface';
diff --git a/core-web/libs/agentic-tools/src/lib/http-client.spec.ts b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts
new file mode 100644
index 000000000000..d655d578c537
--- /dev/null
+++ b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts
@@ -0,0 +1,205 @@
+import { createApiAdapter, isBinaryResponseEnvelope } from './http-client';
+
+import type { Adapter, AdapterMethod } from './types';
+
+/**
+ * A real 1x1 red PNG. Its first byte is 0x89, which is not valid UTF-8 — the
+ * exact kind of byte that `response.text()` corrupts into U+FFFD. This is the
+ * regression fixture for the binary-response corruption bug.
+ */
+const PNG_BASE64 =
+ 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==';
+const PNG_BYTES = Buffer.from(PNG_BASE64, 'base64');
+
+const CONFIG = { dotcmsUrl: 'https://example.dotcms.com', authToken: 'test-token' };
+
+function getRequestMethod(adapter: Adapter): AdapterMethod {
+ const method = adapter.methods.get('request');
+ if (!method) {
+ throw new Error('request method not registered');
+ }
+ return method;
+}
+
+/** Build a Response-like stub backed by a fixed body buffer. */
+function makeResponse(
+ body: Buffer | string,
+ {
+ contentType,
+ ok = true,
+ status = 200,
+ statusText = 'OK',
+ contentLength
+ }: {
+ contentType: string;
+ ok?: boolean;
+ status?: number;
+ statusText?: string;
+ // Override the Content-Length header independently of the actual body —
+ // lets us simulate a server that advertises an oversized response.
+ contentLength?: string;
+ }
+): Response {
+ const buffer = typeof body === 'string' ? Buffer.from(body, 'utf-8') : body;
+ const headerValues: Record = {
+ 'content-type': contentType,
+ 'content-length': contentLength ?? String(buffer.byteLength)
+ };
+ return {
+ ok,
+ status,
+ statusText,
+ headers: { get: (name: string) => headerValues[name.toLowerCase()] ?? null },
+ json: async () => JSON.parse(buffer.toString('utf-8')),
+ text: async () => buffer.toString('utf-8'),
+ arrayBuffer: async () =>
+ buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
+ } as unknown as Response;
+}
+
+describe('createApiAdapter response parsing', () => {
+ const fetchMock = jest.fn();
+
+ beforeEach(() => {
+ fetchMock.mockReset();
+ global.fetch = fetchMock as unknown as typeof fetch;
+ });
+
+ it('round-trips a binary (PNG) body as a base64 envelope without corrupting bytes', async () => {
+ fetchMock.mockResolvedValue(makeResponse(PNG_BYTES, { contentType: 'image/png' }));
+
+ const adapter = createApiAdapter(CONFIG);
+ const result = await getRequestMethod(adapter).execute({ path: '/dA/abc123' });
+
+ expect(isBinaryResponseEnvelope(result)).toBe(true);
+ const envelope = result as {
+ __dotcmsBinary: true;
+ contentType: string;
+ base64: string;
+ byteLength: number;
+ };
+ expect(envelope.contentType).toBe('image/png');
+ expect(envelope.byteLength).toBe(PNG_BYTES.byteLength);
+ // The decoded bytes must be byte-exact to the source — the actual regression guard.
+ expect(Buffer.from(envelope.base64, 'base64').equals(PNG_BYTES)).toBe(true);
+ });
+
+ it('parses JSON content as an object', async () => {
+ fetchMock.mockResolvedValue(
+ makeResponse(JSON.stringify({ hello: 'world' }), { contentType: 'application/json' })
+ );
+
+ const adapter = createApiAdapter(CONFIG);
+ const result = await getRequestMethod(adapter).execute({ path: '/api/v1/x' });
+
+ expect(result).toEqual({ hello: 'world' });
+ });
+
+ it('returns textual content types as strings', async () => {
+ fetchMock.mockResolvedValue(
+ makeResponse('', { contentType: 'application/xml; charset=utf-8' })
+ );
+
+ const adapter = createApiAdapter(CONFIG);
+ const result = await getRequestMethod(adapter).execute({ path: '/api/x.xml' });
+
+ expect(result).toBe('');
+ });
+
+ it('treats +json content types as textual strings', async () => {
+ fetchMock.mockResolvedValue(
+ makeResponse('{"a":1}', { contentType: 'application/vnd.api+json' })
+ );
+
+ const adapter = createApiAdapter(CONFIG);
+ const result = await getRequestMethod(adapter).execute({ path: '/api/x' });
+
+ expect(result).toBe('{"a":1}');
+ });
+
+ it('forces the binary path when responseType is "base64", even for JSON', async () => {
+ fetchMock.mockResolvedValue(
+ makeResponse(JSON.stringify({ hello: 'world' }), { contentType: 'application/json' })
+ );
+
+ const adapter = createApiAdapter(CONFIG);
+ const result = await getRequestMethod(adapter).execute({
+ path: '/api/v1/x',
+ responseType: 'base64'
+ });
+
+ expect(isBinaryResponseEnvelope(result)).toBe(true);
+ });
+
+ it('reads the error body as text regardless of content-type', async () => {
+ fetchMock.mockResolvedValue(
+ makeResponse('Not Found', {
+ contentType: 'text/html',
+ ok: false,
+ status: 404,
+ statusText: 'Not Found'
+ })
+ );
+
+ const adapter = createApiAdapter(CONFIG);
+ await expect(getRequestMethod(adapter).execute({ path: '/dA/missing' })).rejects.toThrow(
+ 'HTTP 404 Not Found: Not Found'
+ );
+ });
+
+ it('rejects an oversized binary response via Content-Length before buffering', async () => {
+ const oversized = String(26 * 1024 * 1024); // 26MB > 25MB cap
+ const arrayBuffer = jest.fn();
+ fetchMock.mockResolvedValue({
+ ok: true,
+ status: 200,
+ statusText: 'OK',
+ headers: {
+ get: (name: string) =>
+ name.toLowerCase() === 'content-type'
+ ? 'application/octet-stream'
+ : name.toLowerCase() === 'content-length'
+ ? oversized
+ : null
+ },
+ arrayBuffer
+ } as unknown as Response);
+
+ const adapter = createApiAdapter(CONFIG);
+ await expect(getRequestMethod(adapter).execute({ path: '/dA/huge' })).rejects.toThrow(
+ 'exceeds the'
+ );
+ // The body must never be buffered when Content-Length already exceeds the cap.
+ expect(arrayBuffer).not.toHaveBeenCalled();
+ });
+
+ describe('isBinaryResponseEnvelope', () => {
+ it('accepts a fully-formed envelope', () => {
+ expect(
+ isBinaryResponseEnvelope({
+ __dotcmsBinary: true,
+ contentType: 'image/png',
+ base64: 'AA==',
+ byteLength: 1
+ })
+ ).toBe(true);
+ });
+
+ it('rejects an envelope missing contentType or byteLength', () => {
+ expect(isBinaryResponseEnvelope({ __dotcmsBinary: true, base64: 'AA==' })).toBe(false);
+ expect(
+ isBinaryResponseEnvelope({
+ __dotcmsBinary: true,
+ base64: 'AA==',
+ contentType: 'image/png'
+ })
+ ).toBe(false);
+ });
+
+ it('rejects non-envelope values', () => {
+ expect(isBinaryResponseEnvelope(null)).toBe(false);
+ expect(isBinaryResponseEnvelope('string')).toBe(false);
+ expect(isBinaryResponseEnvelope({ hello: 'world' })).toBe(false);
+ });
+ });
+});
diff --git a/core-web/libs/agentic-tools/src/lib/http-client.ts b/core-web/libs/agentic-tools/src/lib/http-client.ts
index e0a532740d54..0366eab8b612 100644
--- a/core-web/libs/agentic-tools/src/lib/http-client.ts
+++ b/core-web/libs/agentic-tools/src/lib/http-client.ts
@@ -16,6 +16,12 @@ interface RequestOptions {
body?: unknown;
formData?: Record;
headers?: Record;
+ // How to decode the response body. Defaults to content-type auto-detection:
+ // JSON content types are parsed; textual types come back as strings; everything
+ // else (images, fonts, etc.) comes back as a base64 binary envelope so the bytes
+ // survive the JSON.stringify boundary in the consuming sandbox. Set 'base64' to
+ // force the binary path regardless of the declared content-type.
+ responseType?: 'auto' | 'base64';
}
function isFileDescriptor(value: unknown): value is FileFieldDescriptor {
@@ -35,6 +41,89 @@ const MAX_REMOTE_FILE_BYTES = 25 * 1024 * 1024; // 25 MB
// Timeout (ms) for the remote fetch, so a slow/hanging URL cannot stall the host.
const REMOTE_FILE_FETCH_TIMEOUT_MS = 15000;
+// Max size (bytes) for a binary response body returned as a base64 envelope.
+// base64 inflates the payload ~33% and the whole thing flows through
+// JSON.stringify in the consuming sandbox, so large assets can blow up memory
+// and model context — cap it like the upload side already does.
+const MAX_BINARY_RESPONSE_BYTES = 25 * 1024 * 1024; // 25 MB
+
+/**
+ * Tagged envelope returned for non-textual response bodies. The raw bytes are
+ * base64-encoded so they survive the `JSON.stringify` serialization boundary in
+ * `execute.ts` intact — `response.text()` would corrupt any non-UTF-8 byte into
+ * the U+FFFD replacement char. Consumers detect `__dotcmsBinary` and decode.
+ */
+export interface BinaryResponseEnvelope {
+ __dotcmsBinary: true;
+ contentType: string;
+ base64: string;
+ byteLength: number;
+}
+
+/**
+ * Type guard for the binary response envelope. Consumers can use this to detect
+ * a binary body and `Buffer.from(envelope.base64, 'base64')` to recover the bytes.
+ */
+export function isBinaryResponseEnvelope(value: unknown): value is BinaryResponseEnvelope {
+ if (typeof value !== 'object' || value === null) {
+ return false;
+ }
+ const obj = value as Record;
+ return (
+ obj.__dotcmsBinary === true &&
+ typeof obj.base64 === 'string' &&
+ typeof obj.contentType === 'string' &&
+ typeof obj.byteLength === 'number'
+ );
+}
+
+/**
+ * Decide whether a content-type should be decoded as text. Everything that is
+ * not JSON (handled separately) and not in this textual set is treated as
+ * binary and returned as a base64 envelope.
+ */
+function isTextualContentType(contentType: string): boolean {
+ const ct = contentType.toLowerCase();
+ return (
+ ct.startsWith('text/') ||
+ ct.includes('application/xml') ||
+ ct.includes('application/javascript') ||
+ ct.includes('application/x-www-form-urlencoded') ||
+ ct.includes('+json') ||
+ ct.includes('+xml')
+ );
+}
+
+/**
+ * Read a response body as a base64 binary envelope, enforcing the size cap.
+ */
+async function readBinaryResponse(
+ response: Response,
+ contentType: string
+): Promise {
+ // Reject early via Content-Length so we never buffer an oversized body into
+ // memory. The header can be absent or lie, so the post-read check below stays
+ // as the authoritative backstop.
+ const declaredLength = Number(response.headers.get('content-length'));
+ if (Number.isFinite(declaredLength) && declaredLength > MAX_BINARY_RESPONSE_BYTES) {
+ throw new Error(
+ `Binary response (${declaredLength} bytes) exceeds the ${MAX_BINARY_RESPONSE_BYTES}-byte limit`
+ );
+ }
+ const buffer = await response.arrayBuffer();
+ if (buffer.byteLength > MAX_BINARY_RESPONSE_BYTES) {
+ throw new Error(
+ `Binary response (${buffer.byteLength} bytes) exceeds the ${MAX_BINARY_RESPONSE_BYTES}-byte limit`
+ );
+ }
+ return {
+ __dotcmsBinary: true,
+ contentType,
+ base64: Buffer.from(buffer).toString('base64'),
+ byteLength: buffer.byteLength
+ };
+}
+
/**
* Validates a user-supplied file URL before fetching it, to mitigate SSRF.
* Sandbox code can put any string in `desc.url`, and the fetch runs on the
@@ -216,23 +305,28 @@ export function createApiAdapter(config: ApiAdapterConfig): Adapter {
const response = await fetch(url.toString(), fetchOptions);
- // Parse response
+ // Parse response.
const contentType = response.headers.get('content-type') || '';
- let data: unknown;
-
- if (contentType.includes('application/json')) {
- data = await response.json();
- } else {
- data = await response.text();
- }
+ // On error, always read the body as text regardless of the declared
+ // content-type — dotCMS errors come back as HTML/text and we want a
+ // readable message, not a base64 envelope of the error page.
if (!response.ok) {
- throw new Error(
- `HTTP ${response.status} ${response.statusText}: ${typeof data === 'string' ? data : JSON.stringify(data)}`
- );
+ const errorBody = await response.text();
+ throw new Error(`HTTP ${response.status} ${response.statusText}: ${errorBody}`);
}
- return data;
+ const forceBinary = options.responseType === 'base64';
+
+ if (!forceBinary && contentType.includes('application/json')) {
+ return await response.json();
+ }
+ if (!forceBinary && isTextualContentType(contentType)) {
+ return await response.text();
+ }
+ // Non-JSON, non-textual (or explicitly requested): return a base64
+ // envelope so the raw bytes survive JSON.stringify intact.
+ return await readBinaryResponse(response, contentType);
}
};
diff --git a/core-web/libs/agentic-tools/tsconfig.lib.json b/core-web/libs/agentic-tools/tsconfig.lib.json
index 9774c58377f8..bd3cb7eff516 100644
--- a/core-web/libs/agentic-tools/tsconfig.lib.json
+++ b/core-web/libs/agentic-tools/tsconfig.lib.json
@@ -6,5 +6,6 @@
"types": ["node"],
"resolveJsonModule": true
},
- "include": ["src/**/*.ts", "src/**/*.json"]
+ "include": ["src/**/*.ts", "src/**/*.json"],
+ "exclude": ["src/**/*.spec.ts", "src/**/*.test.ts"]
}