From b8f292d4256d87badb2d45d339e1a7bd2954f11e Mon Sep 17 00:00:00 2001 From: Freddy Montes Date: Fri, 19 Jun 2026 07:25:05 -0600 Subject: [PATCH 1/5] fix(agentic-tools): return binary HTTP responses as base64 envelope response.text() UTF-8-decoded every non-JSON body, corrupting binary file assets (images, fonts, ico) into U+FFFD. Parse JSON via .json(), textual content types as strings, and everything else as a base64 { __dotcmsBinary, contentType, base64, byteLength } envelope so bytes survive JSON.stringify across the sandbox boundary. Error responses are read as text so server messages are preserved, with a 25MB cap on the binary path. Adds a PNG round-trip regression test. Fixes #36241 Co-Authored-By: Claude Opus 4.8 (1M context) --- core-web/libs/agentic-tools/src/index.ts | 4 +- .../agentic-tools/src/lib/http-client.spec.ts | 137 ++++++++++++++++++ .../libs/agentic-tools/src/lib/http-client.ts | 103 +++++++++++-- 3 files changed, 232 insertions(+), 12 deletions(-) create mode 100644 core-web/libs/agentic-tools/src/lib/http-client.spec.ts diff --git a/core-web/libs/agentic-tools/src/index.ts b/core-web/libs/agentic-tools/src/index.ts index 2232fa04933c..4110219556e5 100644 --- a/core-web/libs/agentic-tools/src/index.ts +++ b/core-web/libs/agentic-tools/src/index.ts @@ -1,8 +1,8 @@ export { Executor, createExecutor } from './lib/executor'; export type { ExecutorOptions } from './lib/executor'; -export { createApiAdapter } from './lib/http-client'; -export type { ApiAdapterConfig } from './lib/http-client'; +export { createApiAdapter, isBinaryResponseEnvelope } from './lib/http-client'; +export type { ApiAdapterConfig, BinaryResponseEnvelope } from './lib/http-client'; export { createSandbox } from './lib/sandbox'; export type { ISandbox, SandboxFactory } from './lib/sandbox/interface'; diff --git a/core-web/libs/agentic-tools/src/lib/http-client.spec.ts b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts new file mode 100644 index 000000000000..b71cb21af5d8 --- /dev/null +++ b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts @@ -0,0 +1,137 @@ +import { createApiAdapter, isBinaryResponseEnvelope } from './http-client'; + +import type { Adapter, AdapterMethod } from './types'; + +/** + * A real 1x1 red PNG. Its first byte is 0x89, which is not valid UTF-8 — the + * exact kind of byte that `response.text()` corrupts into U+FFFD. This is the + * regression fixture for the binary-response corruption bug. + */ +const PNG_BASE64 = + 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=='; +const PNG_BYTES = Buffer.from(PNG_BASE64, 'base64'); + +const CONFIG = { dotcmsUrl: 'https://example.dotcms.com', authToken: 'test-token' }; + +function getRequestMethod(adapter: Adapter): AdapterMethod { + const method = adapter.methods.get('request'); + if (!method) { + throw new Error('request method not registered'); + } + return method; +} + +/** Build a Response-like stub backed by a fixed body buffer. */ +function makeResponse( + body: Buffer | string, + { contentType, ok = true, status = 200, statusText = 'OK' }: { + contentType: string; + ok?: boolean; + status?: number; + statusText?: string; + } +): Response { + const buffer = + typeof body === 'string' ? Buffer.from(body, 'utf-8') : body; + return { + ok, + status, + statusText, + headers: { get: (name: string) => (name.toLowerCase() === 'content-type' ? contentType : null) }, + json: async () => JSON.parse(buffer.toString('utf-8')), + text: async () => buffer.toString('utf-8'), + arrayBuffer: async () => + buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength) + } as unknown as Response; +} + +describe('createApiAdapter response parsing', () => { + const fetchMock = jest.fn(); + + beforeEach(() => { + fetchMock.mockReset(); + global.fetch = fetchMock as unknown as typeof fetch; + }); + + it('round-trips a binary (PNG) body as a base64 envelope without corrupting bytes', async () => { + fetchMock.mockResolvedValue(makeResponse(PNG_BYTES, { contentType: 'image/png' })); + + const adapter = createApiAdapter(CONFIG); + const result = await getRequestMethod(adapter).execute({ path: '/dA/abc123' }); + + expect(isBinaryResponseEnvelope(result)).toBe(true); + const envelope = result as { + __dotcmsBinary: true; + contentType: string; + base64: string; + byteLength: number; + }; + expect(envelope.contentType).toBe('image/png'); + expect(envelope.byteLength).toBe(PNG_BYTES.byteLength); + // The decoded bytes must be byte-exact to the source — the actual regression guard. + expect(Buffer.from(envelope.base64, 'base64').equals(PNG_BYTES)).toBe(true); + }); + + it('parses JSON content as an object', async () => { + fetchMock.mockResolvedValue( + makeResponse(JSON.stringify({ hello: 'world' }), { contentType: 'application/json' }) + ); + + const adapter = createApiAdapter(CONFIG); + const result = await getRequestMethod(adapter).execute({ path: '/api/v1/x' }); + + expect(result).toEqual({ hello: 'world' }); + }); + + it('returns textual content types as strings', async () => { + fetchMock.mockResolvedValue( + makeResponse('', { contentType: 'application/xml; charset=utf-8' }) + ); + + const adapter = createApiAdapter(CONFIG); + const result = await getRequestMethod(adapter).execute({ path: '/api/x.xml' }); + + expect(result).toBe(''); + }); + + it('treats +json content types as textual strings', async () => { + fetchMock.mockResolvedValue( + makeResponse('{"a":1}', { contentType: 'application/vnd.api+json' }) + ); + + const adapter = createApiAdapter(CONFIG); + const result = await getRequestMethod(adapter).execute({ path: '/api/x' }); + + expect(result).toBe('{"a":1}'); + }); + + it('forces the binary path when responseType is "base64", even for JSON', async () => { + fetchMock.mockResolvedValue( + makeResponse(JSON.stringify({ hello: 'world' }), { contentType: 'application/json' }) + ); + + const adapter = createApiAdapter(CONFIG); + const result = await getRequestMethod(adapter).execute({ + path: '/api/v1/x', + responseType: 'base64' + }); + + expect(isBinaryResponseEnvelope(result)).toBe(true); + }); + + it('reads the error body as text regardless of content-type', async () => { + fetchMock.mockResolvedValue( + makeResponse('Not Found', { + contentType: 'text/html', + ok: false, + status: 404, + statusText: 'Not Found' + }) + ); + + const adapter = createApiAdapter(CONFIG); + await expect(getRequestMethod(adapter).execute({ path: '/dA/missing' })).rejects.toThrow( + 'HTTP 404 Not Found: Not Found' + ); + }); +}); diff --git a/core-web/libs/agentic-tools/src/lib/http-client.ts b/core-web/libs/agentic-tools/src/lib/http-client.ts index e0a532740d54..ad5b2e7cce62 100644 --- a/core-web/libs/agentic-tools/src/lib/http-client.ts +++ b/core-web/libs/agentic-tools/src/lib/http-client.ts @@ -16,6 +16,12 @@ interface RequestOptions { body?: unknown; formData?: Record; headers?: Record; + // How to decode the response body. Defaults to content-type auto-detection: + // JSON content types are parsed; textual types come back as strings; everything + // else (images, fonts, etc.) comes back as a base64 binary envelope so the bytes + // survive the JSON.stringify boundary in the consuming sandbox. Set 'base64' to + // force the binary path regardless of the declared content-type. + responseType?: 'auto' | 'base64'; } function isFileDescriptor(value: unknown): value is FileFieldDescriptor { @@ -35,6 +41,76 @@ const MAX_REMOTE_FILE_BYTES = 25 * 1024 * 1024; // 25 MB // Timeout (ms) for the remote fetch, so a slow/hanging URL cannot stall the host. const REMOTE_FILE_FETCH_TIMEOUT_MS = 15000; +// Max size (bytes) for a binary response body returned as a base64 envelope. +// base64 inflates the payload ~33% and the whole thing flows through +// JSON.stringify in the consuming sandbox, so large assets can blow up memory +// and model context — cap it like the upload side already does. +const MAX_BINARY_RESPONSE_BYTES = 25 * 1024 * 1024; // 25 MB + +/** + * Tagged envelope returned for non-textual response bodies. The raw bytes are + * base64-encoded so they survive the `JSON.stringify` serialization boundary in + * `execute.ts` intact — `response.text()` would corrupt any non-UTF-8 byte into + * the U+FFFD replacement char. Consumers detect `__dotcmsBinary` and decode. + */ +export interface BinaryResponseEnvelope { + __dotcmsBinary: true; + contentType: string; + base64: string; + byteLength: number; +} + +/** + * Type guard for the binary response envelope. Consumers can use this to detect + * a binary body and `Buffer.from(envelope.base64, 'base64')` to recover the bytes. + */ +export function isBinaryResponseEnvelope(value: unknown): value is BinaryResponseEnvelope { + return ( + typeof value === 'object' && + value !== null && + (value as Record).__dotcmsBinary === true && + typeof (value as Record).base64 === 'string' + ); +} + +/** + * Decide whether a content-type should be decoded as text. Everything that is + * not JSON (handled separately) and not in this textual set is treated as + * binary and returned as a base64 envelope. + */ +function isTextualContentType(contentType: string): boolean { + const ct = contentType.toLowerCase(); + return ( + ct.startsWith('text/') || + ct.includes('application/xml') || + ct.includes('application/javascript') || + ct.includes('application/x-www-form-urlencoded') || + ct.includes('+json') || + ct.includes('+xml') + ); +} + +/** + * Read a response body as a base64 binary envelope, enforcing the size cap. + */ +async function readBinaryResponse( + response: Response, + contentType: string +): Promise { + const buffer = await response.arrayBuffer(); + if (buffer.byteLength > MAX_BINARY_RESPONSE_BYTES) { + throw new Error( + `Binary response (${buffer.byteLength} bytes) exceeds the ${MAX_BINARY_RESPONSE_BYTES}-byte limit` + ); + } + return { + __dotcmsBinary: true, + contentType, + base64: Buffer.from(buffer).toString('base64'), + byteLength: buffer.byteLength + }; +} + /** * Validates a user-supplied file URL before fetching it, to mitigate SSRF. * Sandbox code can put any string in `desc.url`, and the fetch runs on the @@ -216,23 +292,30 @@ export function createApiAdapter(config: ApiAdapterConfig): Adapter { const response = await fetch(url.toString(), fetchOptions); - // Parse response + // Parse response. const contentType = response.headers.get('content-type') || ''; - let data: unknown; - - if (contentType.includes('application/json')) { - data = await response.json(); - } else { - data = await response.text(); - } + // On error, always read the body as text regardless of the declared + // content-type — dotCMS errors come back as HTML/text and we want a + // readable message, not a base64 envelope of the error page. if (!response.ok) { + const errorBody = await response.text(); throw new Error( - `HTTP ${response.status} ${response.statusText}: ${typeof data === 'string' ? data : JSON.stringify(data)}` + `HTTP ${response.status} ${response.statusText}: ${errorBody}` ); } - return data; + const forceBinary = options.responseType === 'base64'; + + if (!forceBinary && contentType.includes('application/json')) { + return await response.json(); + } + if (!forceBinary && isTextualContentType(contentType)) { + return await response.text(); + } + // Non-JSON, non-textual (or explicitly requested): return a base64 + // envelope so the raw bytes survive JSON.stringify intact. + return await readBinaryResponse(response, contentType); } }; From ea4b209ca16c5cff3c73d1a1c2086f7b427fef93 Mon Sep 17 00:00:00 2001 From: Freddy Montes Date: Fri, 19 Jun 2026 07:29:49 -0600 Subject: [PATCH 2/5] docs(mcp-server): document binary-response envelope in execute tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tell the model that binary file-asset endpoints (e.g. /api/v2/assets, /dA) return a { __dotcmsBinary, contentType, base64, byteLength } envelope whose base64 is the raw bytes to decode — not text. Co-Authored-By: Claude Opus 4.8 (1M context) --- core-web/apps/mcp-server/src/tools/execute.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core-web/apps/mcp-server/src/tools/execute.ts b/core-web/apps/mcp-server/src/tools/execute.ts index fe53968b4802..28d42a0ae090 100644 --- a/core-web/apps/mcp-server/src/tools/execute.ts +++ b/core-web/apps/mcp-server/src/tools/execute.ts @@ -43,6 +43,11 @@ Tips: - Use \`pick(arr, fields)\` to return only the fields you need — responses can be very large - For file uploads use \`formData\` with \`{ name, type, data }\` (base64) or \`{ name, type, url }\` (remote URL) +Binary responses (file assets — images, fonts, PDFs, etc.): +- Endpoints that return non-text bodies (e.g. GET \`/api/v2/assets/{identifier}\` and \`/dA/{id}\`, content-type \`application/octet-stream\` or \`image/*\`) come back as an envelope: \`{ __dotcmsBinary: true, contentType, base64, byteLength }\`. +- The \`base64\` field IS the raw file bytes — base64-decode it to recover the exact file. Do NOT treat it as text; the bytes are intact (not UTF-8-mangled). +- JSON and textual responses (\`text/*\`, xml, js, \`+json\`/\`+xml\`) are returned as parsed objects / strings as before — only binary bodies use the envelope. + Block Editor (Story Block) fields: - A Story Block field stores a string. When creating or updating content via a fire endpoint, send the field value as an **HTML or Markdown string** — do NOT hand-author the ProseMirror/JSON document. dotCMS stores it as-is and converts it to the Block Editor structure when the contentlet is opened in the editor. - Example: \`{ "contentType": "Blog", "title": "My Post", "body": "

Intro

Hello world.

" }\` — where \`body\` is the Story Block field. From 92fca069c8e84d4b668ae7f9f7797306c8d31959 Mon Sep 17 00:00:00 2001 From: Freddy Montes Date: Fri, 19 Jun 2026 07:35:06 -0600 Subject: [PATCH 3/5] fix(agentic-tools): harden binary envelope guard and size cap Address PR review: - isBinaryResponseEnvelope now also validates contentType (string) and byteLength (number), so a partial object can't pass the guard. - readBinaryResponse rejects early via Content-Length before buffering the body, so an oversized response can't OOM before the cap fires; the post-read check stays as the backstop for absent/lying headers. - Tests for both: oversized Content-Length rejection (asserts the body is never buffered) and malformed-envelope guard cases. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../agentic-tools/src/lib/http-client.spec.ts | 70 +++++++++++++++++-- .../libs/agentic-tools/src/lib/http-client.ts | 21 ++++-- 2 files changed, 83 insertions(+), 8 deletions(-) diff --git a/core-web/libs/agentic-tools/src/lib/http-client.spec.ts b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts index b71cb21af5d8..89360f987dc8 100644 --- a/core-web/libs/agentic-tools/src/lib/http-client.spec.ts +++ b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts @@ -24,20 +24,26 @@ function getRequestMethod(adapter: Adapter): AdapterMethod { /** Build a Response-like stub backed by a fixed body buffer. */ function makeResponse( body: Buffer | string, - { contentType, ok = true, status = 200, statusText = 'OK' }: { + { contentType, ok = true, status = 200, statusText = 'OK', contentLength }: { contentType: string; ok?: boolean; status?: number; statusText?: string; + // Override the Content-Length header independently of the actual body — + // lets us simulate a server that advertises an oversized response. + contentLength?: string; } ): Response { - const buffer = - typeof body === 'string' ? Buffer.from(body, 'utf-8') : body; + const buffer = typeof body === 'string' ? Buffer.from(body, 'utf-8') : body; + const headerValues: Record = { + 'content-type': contentType, + 'content-length': contentLength ?? String(buffer.byteLength) + }; return { ok, status, statusText, - headers: { get: (name: string) => (name.toLowerCase() === 'content-type' ? contentType : null) }, + headers: { get: (name: string) => headerValues[name.toLowerCase()] ?? null }, json: async () => JSON.parse(buffer.toString('utf-8')), text: async () => buffer.toString('utf-8'), arrayBuffer: async () => @@ -134,4 +140,60 @@ describe('createApiAdapter response parsing', () => { 'HTTP 404 Not Found: Not Found' ); }); + + it('rejects an oversized binary response via Content-Length before buffering', async () => { + const oversized = String(26 * 1024 * 1024); // 26MB > 25MB cap + const arrayBuffer = jest.fn(); + fetchMock.mockResolvedValue({ + ok: true, + status: 200, + statusText: 'OK', + headers: { + get: (name: string) => + name.toLowerCase() === 'content-type' + ? 'application/octet-stream' + : name.toLowerCase() === 'content-length' + ? oversized + : null + }, + arrayBuffer + } as unknown as Response); + + const adapter = createApiAdapter(CONFIG); + await expect(getRequestMethod(adapter).execute({ path: '/dA/huge' })).rejects.toThrow( + 'exceeds the' + ); + // The body must never be buffered when Content-Length already exceeds the cap. + expect(arrayBuffer).not.toHaveBeenCalled(); + }); + + describe('isBinaryResponseEnvelope', () => { + it('accepts a fully-formed envelope', () => { + expect( + isBinaryResponseEnvelope({ + __dotcmsBinary: true, + contentType: 'image/png', + base64: 'AA==', + byteLength: 1 + }) + ).toBe(true); + }); + + it('rejects an envelope missing contentType or byteLength', () => { + expect(isBinaryResponseEnvelope({ __dotcmsBinary: true, base64: 'AA==' })).toBe(false); + expect( + isBinaryResponseEnvelope({ + __dotcmsBinary: true, + base64: 'AA==', + contentType: 'image/png' + }) + ).toBe(false); + }); + + it('rejects non-envelope values', () => { + expect(isBinaryResponseEnvelope(null)).toBe(false); + expect(isBinaryResponseEnvelope('string')).toBe(false); + expect(isBinaryResponseEnvelope({ hello: 'world' })).toBe(false); + }); + }); }); diff --git a/core-web/libs/agentic-tools/src/lib/http-client.ts b/core-web/libs/agentic-tools/src/lib/http-client.ts index ad5b2e7cce62..311f8c2140bb 100644 --- a/core-web/libs/agentic-tools/src/lib/http-client.ts +++ b/core-web/libs/agentic-tools/src/lib/http-client.ts @@ -65,11 +65,15 @@ export interface BinaryResponseEnvelope { * a binary body and `Buffer.from(envelope.base64, 'base64')` to recover the bytes. */ export function isBinaryResponseEnvelope(value: unknown): value is BinaryResponseEnvelope { + if (typeof value !== 'object' || value === null) { + return false; + } + const obj = value as Record; return ( - typeof value === 'object' && - value !== null && - (value as Record).__dotcmsBinary === true && - typeof (value as Record).base64 === 'string' + obj.__dotcmsBinary === true && + typeof obj.base64 === 'string' && + typeof obj.contentType === 'string' && + typeof obj.byteLength === 'number' ); } @@ -97,6 +101,15 @@ async function readBinaryResponse( response: Response, contentType: string ): Promise { + // Reject early via Content-Length so we never buffer an oversized body into + // memory. The header can be absent or lie, so the post-read check below stays + // as the authoritative backstop. + const declaredLength = Number(response.headers.get('content-length')); + if (Number.isFinite(declaredLength) && declaredLength > MAX_BINARY_RESPONSE_BYTES) { + throw new Error( + `Binary response (${declaredLength} bytes) exceeds the ${MAX_BINARY_RESPONSE_BYTES}-byte limit` + ); + } const buffer = await response.arrayBuffer(); if (buffer.byteLength > MAX_BINARY_RESPONSE_BYTES) { throw new Error( From 835a98749986d7e99e112fcf48429f077baec0c4 Mon Sep 17 00:00:00 2001 From: Freddy Montes Date: Fri, 19 Jun 2026 07:40:21 -0600 Subject: [PATCH 4/5] fix format --- core-web/libs/agentic-tools/src/lib/http-client.spec.ts | 8 +++++++- core-web/libs/agentic-tools/src/lib/http-client.ts | 4 +--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core-web/libs/agentic-tools/src/lib/http-client.spec.ts b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts index 89360f987dc8..d655d578c537 100644 --- a/core-web/libs/agentic-tools/src/lib/http-client.spec.ts +++ b/core-web/libs/agentic-tools/src/lib/http-client.spec.ts @@ -24,7 +24,13 @@ function getRequestMethod(adapter: Adapter): AdapterMethod { /** Build a Response-like stub backed by a fixed body buffer. */ function makeResponse( body: Buffer | string, - { contentType, ok = true, status = 200, statusText = 'OK', contentLength }: { + { + contentType, + ok = true, + status = 200, + statusText = 'OK', + contentLength + }: { contentType: string; ok?: boolean; status?: number; diff --git a/core-web/libs/agentic-tools/src/lib/http-client.ts b/core-web/libs/agentic-tools/src/lib/http-client.ts index 311f8c2140bb..0366eab8b612 100644 --- a/core-web/libs/agentic-tools/src/lib/http-client.ts +++ b/core-web/libs/agentic-tools/src/lib/http-client.ts @@ -313,9 +313,7 @@ export function createApiAdapter(config: ApiAdapterConfig): Adapter { // readable message, not a base64 envelope of the error page. if (!response.ok) { const errorBody = await response.text(); - throw new Error( - `HTTP ${response.status} ${response.statusText}: ${errorBody}` - ); + throw new Error(`HTTP ${response.status} ${response.statusText}: ${errorBody}`); } const forceBinary = options.responseType === 'base64'; From c2ef44288f6de5663edfd565c4b647066f29b9af Mon Sep 17 00:00:00 2001 From: Freddy Montes Date: Fri, 19 Jun 2026 07:54:26 -0600 Subject: [PATCH 5/5] fix(agentic-tools): exclude spec files from library tsc build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The @nx/js:tsc build compiled src/**/*.spec.ts into the library because tsconfig.lib.json had no exclude — it lacked jest/test-runner types and broke `nx build agentic-tools`. This never surfaced before because the lib had no spec files until this PR added http-client.spec.ts. Exclude *.spec.ts / *.test.ts from the lib build (they still compile under tsconfig.spec.json for the test target). Co-Authored-By: Claude Opus 4.8 (1M context) --- core-web/libs/agentic-tools/tsconfig.lib.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core-web/libs/agentic-tools/tsconfig.lib.json b/core-web/libs/agentic-tools/tsconfig.lib.json index 9774c58377f8..bd3cb7eff516 100644 --- a/core-web/libs/agentic-tools/tsconfig.lib.json +++ b/core-web/libs/agentic-tools/tsconfig.lib.json @@ -6,5 +6,6 @@ "types": ["node"], "resolveJsonModule": true }, - "include": ["src/**/*.ts", "src/**/*.json"] + "include": ["src/**/*.ts", "src/**/*.json"], + "exclude": ["src/**/*.spec.ts", "src/**/*.test.ts"] }