diff --git a/src/const.ts b/src/const.ts index 0074f0e2..52b85cd0 100644 --- a/src/const.ts +++ b/src/const.ts @@ -22,6 +22,35 @@ export const KV_RECORD_MAX_INLINE_BYTES = 256 * 1024; */ export const DATASET_SIZE_HINT_BYTES = 50000; +/** + * Hard ceiling on the effective row count for `get-dataset-items`. The model freely requests far more + * (real traces show up to 1226 items, blowing the context budget); we clamp to this before fetching so + * an over-asking call is served a bounded first page instead of the whole dataset, then steered to + * paginate via `offset`. The cheap, pre-fetch first layer; {@link DATASET_ITEMS_MAX_BYTES} is the + * post-fetch backstop for the case where even this many items are individually huge. + */ +export const MAX_DATASET_ITEMS_LIMIT = 100; + +/** + * Hard cap on a single `get-dataset-items` response, measured on the TOON text in `content[0]` — which + * is exactly what the model receives. The chat wraps MCP tools with `schemas: "automatic"`, so the AI + * SDK serializes the result via `mcpToModelOutput`, forwarding only `content` (text/image) and dropping + * `structuredContent`. When exceeded, trailing items are dropped until the payload fits and the caller + * is told to page via `offset`. Mirrors {@link DATASET_SIZE_HINT_BYTES} (~25k-token budget) — the + * former soft hint becomes a real truncation cap. + */ +export const DATASET_ITEMS_MAX_BYTES = 50000; + +/** + * Inline cap for text/JSON key-value-store records. Larger values are truncated and a link to the full + * record is returned instead of inlining the whole value. Parallels {@link KV_RECORD_MAX_INLINE_BYTES} + * (the 256 KB binary cap); text/JSON was previously inlined uncapped. + */ +export const KV_RECORD_MAX_INLINE_TEXT_BYTES = 50 * 1024; + +/** Per-snippet `content` char cap for `search-apify-docs`; full page is fetched separately via `fetch-apify-docs`. */ +export const DOCS_SNIPPET_MAX_LENGTH = 1000; + /** Shared steer appended to large-output hints so the model narrows instead of refetching everything. */ export const NARROW_OUTPUT_HINT = 'narrow with fields= or page with offset'; diff --git a/src/tools/common/get_dataset_items.ts b/src/tools/common/get_dataset_items.ts index 809e7ca6..092e4faa 100644 --- a/src/tools/common/get_dataset_items.ts +++ b/src/tools/common/get_dataset_items.ts @@ -1,11 +1,18 @@ import dedent from 'dedent'; import { z } from 'zod'; -import { HelperTools, HTTP_NOT_FOUND } from '../../const.js'; +import { + DATASET_ITEMS_MAX_BYTES, + HelperTools, + HTTP_NOT_FOUND, + MAX_DATASET_ITEMS_LIMIT, + NARROW_OUTPUT_HINT, +} from '../../const.js'; import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../../types.js'; import { TOOL_TYPE } from '../../types.js'; import { compileSchema } from '../../utils/ajv.js'; import { buildConsoleDatasetUrl, getConsoleLinkContext } from '../../utils/console_link.js'; +import { encodeToon } from '../../utils/encode_text.js'; import { parseCommaSeparatedList, stripQuoteWrappers } from '../../utils/generic.js'; import { getHttpStatusCode } from '../../utils/logging.js'; import { datasetItemsOutputSchema } from '../structured_output_schemas.js'; @@ -25,6 +32,30 @@ export function extractDotPrefixes(fields: string[]): string[] { return [...prefixes]; } +/** + * Largest prefix length of `items` whose encoded `structuredContent` stays within + * {@link DATASET_ITEMS_MAX_BYTES}. Measured on the TOON text (`content[0]`) — which is exactly what the + * model receives: the chat wraps MCP tools with `schemas: "automatic"`, so the AI SDK serializes the + * result via `mcpToModelOutput`, which forwards only `content` (text/image parts) and drops + * `structuredContent`. Binary-searches the prefix so encoding runs O(log n) times. Returns at least 1 + * when any item exists, so an oversized single item is still surfaced (with the narrow-output hint) + * rather than returning an empty page. + */ +function maxItemsWithinByteCap(items: T[], buildStructuredContent: (items: T[]) => Record): number { + const encodedBytes = (n: number): number => + Buffer.byteLength(encodeToon(buildStructuredContent(items.slice(0, n)))); + if (items.length === 0 || encodedBytes(items.length) <= DATASET_ITEMS_MAX_BYTES) return items.length; + + let lo = 1; // keep at least one item even if it alone exceeds the cap + let hi = items.length; + while (lo < hi) { + const mid = Math.ceil((lo + hi) / 2); + if (encodedBytes(mid) <= DATASET_ITEMS_MAX_BYTES) lo = mid; + else hi = mid - 1; + } + return lo; +} + const getDatasetItemsArgs = z.object({ datasetId: z.string().min(1).describe('Dataset ID or username~dataset-name.'), clean: z @@ -99,7 +130,9 @@ export const getDatasetItems: ToolEntry = Object.freeze({ const flatten = parsed.flatten !== undefined ? parseCommaSeparatedList(parsed.flatten) : extractDotPrefixes(fields); - const effectiveLimit = parsed.limit ?? DEFAULT_DATASET_ITEMS_LIMIT; + // Layer 1: clamp the requested count before fetching — the model freely asks for far more + // (real traces: up to 1226), so bound what we transfer and let pagination serve the rest. + const effectiveLimit = Math.min(parsed.limit ?? DEFAULT_DATASET_ITEMS_LIMIT, MAX_DATASET_ITEMS_LIMIT); const datasetId = stripQuoteWrappers(parsed.datasetId); // `dataset(id).listItems()` throws ApifyApiError on a missing dataset // instead of returning undefined (only `.get()` and `.getStatistics()` @@ -127,23 +160,44 @@ export const getDatasetItems: ToolEntry = Object.freeze({ const offset = parsed.offset ?? 0; const apifyConsoleUrl = buildConsoleDatasetUrl(await getConsoleLinkContext(apifyToken, client), datasetId); - const structuredContent = { + const buildStructuredContent = (items: typeof v.items): Record => ({ datasetId, apifyConsoleUrl, - items: v.items, - itemCount: v.items.length, + items, + itemCount: items.length, totalItemCount: v.total, offset, limit: effectiveLimit, - }; + }); + + // Layer 2: byte-cap the encoded response — catches the case where even the clamped page is huge + // (a few large items). Drop trailing items until the on-the-wire payload fits; pagination serves + // the rest. `totalItemCount` stays the dataset total so the next-step offset math is exact. + const keep = maxItemsWithinByteCap(v.items, buildStructuredContent); + const items = keep < v.items.length ? v.items.slice(0, keep) : v.items; + const truncatedByBytes = keep < v.items.length; + const structuredContent = buildStructuredContent(items); const { summary, nextStep } = buildDatasetItemsSummaryNextStep({ datasetId, - itemCount: v.items.length, + // Use the actually-returned count, not the requested limit, so the next page resumes + // exactly where this one ended and no items are skipped. + itemCount: items.length, totalItemCount: v.total, offset, loadedToolNames: apifyMcpServer.listToolNames(), }); - return buildStorageResponse({ structuredContent, summary, nextStep, toon: true, apifyConsoleUrl }); + // When the byte cap (not just paging) forced the cut, steer the model to shrink per-item size so + // the next page can carry more rows instead of getting capped to the same small count again. + const cappedNextStep = truncatedByBytes + ? `Response capped at ${DATASET_ITEMS_MAX_BYTES} bytes (returned ${items.length} items). ${nextStep} To fit more rows per page, ${NARROW_OUTPUT_HINT}.` + : nextStep; + return buildStorageResponse({ + structuredContent, + summary, + nextStep: cappedNextStep, + toon: true, + apifyConsoleUrl, + }); }, } as const); diff --git a/src/tools/common/get_key_value_store_record.ts b/src/tools/common/get_key_value_store_record.ts index f59c6755..4dde7d79 100644 --- a/src/tools/common/get_key_value_store_record.ts +++ b/src/tools/common/get_key_value_store_record.ts @@ -2,12 +2,12 @@ import type { AudioContent, EmbeddedResource, ImageContent, ResourceLink } from import dedent from 'dedent'; import { z } from 'zod'; -import { HelperTools, KV_RECORD_MAX_INLINE_BYTES } from '../../const.js'; +import { HelperTools, KV_RECORD_MAX_INLINE_BYTES, KV_RECORD_MAX_INLINE_TEXT_BYTES } from '../../const.js'; import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../../types.js'; import { TOOL_TYPE } from '../../types.js'; import { compileSchema } from '../../utils/ajv.js'; import { buildConsoleKeyValueStoreUrl, getConsoleLinkContext } from '../../utils/console_link.js'; -import { computeValueBytes, stripQuoteWrappers } from '../../utils/generic.js'; +import { computeValueBytes, stripQuoteWrappers, truncateToBytes } from '../../utils/generic.js'; import { keyValueStoreRecordOutputSchema } from '../structured_output_schemas.js'; import { buildConsoleLinkContent, @@ -138,6 +138,23 @@ export const getKeyValueStoreRecord: ToolEntry = Object.freeze({ ], }; } + // Large text/JSON values were previously inlined uncapped — a multi-MB record would blow the + // context window. Mirror the binary cap: truncate to a byte budget and point at the full record. + if (bytes !== undefined && bytes > KV_RECORD_MAX_INLINE_TEXT_BYTES) { + const uri = await store.getRecordPublicUrl(recordKey); + const asText = typeof value === 'string' ? value : JSON.stringify(value); + const preview = truncateToBytes(asText, KV_RECORD_MAX_INLINE_TEXT_BYTES); + const truncatedSummary = + `${summary} Value truncated to ${KV_RECORD_MAX_INLINE_TEXT_BYTES} bytes ` + + `(full size ${bytes} bytes); fetch the full record at ${uri}.`; + const structuredContent = { + keyValueStoreId, + key: record.key, + value: preview, + ...(contentType && { contentType }), + }; + return buildStorageResponse({ structuredContent, summary: truncatedSummary, apifyConsoleUrl }); + } // Text/JSON values serialize cleanly — return them as structuredContent per the storage-tool contract. return buildStorageResponse({ structuredContent: { keyValueStoreId, ...record }, summary, apifyConsoleUrl }); }, diff --git a/src/tools/common/search_apify_docs.ts b/src/tools/common/search_apify_docs.ts index 58736cf5..9c8f31ba 100644 --- a/src/tools/common/search_apify_docs.ts +++ b/src/tools/common/search_apify_docs.ts @@ -1,6 +1,6 @@ import { z } from 'zod'; -import { DOCS_SOURCES, HelperTools } from '../../const.js'; +import { DOCS_SNIPPET_MAX_LENGTH, DOCS_SOURCES, HelperTools } from '../../const.js'; import type { InternalToolArgs, ToolEntry, ToolInputSchema } from '../../types.js'; import { TOOL_TYPE } from '../../types.js'; import { compileSchema } from '../../utils/ajv.js'; @@ -41,6 +41,12 @@ Fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool ${PLATFORM_DOCS_PREFERENCE}`; } +/** Clip an Algolia snippet to {@link DOCS_SNIPPET_MAX_LENGTH} chars; full page is fetched via fetch-apify-docs. */ +function clipSnippet(content: string): string { + if (content.length <= DOCS_SNIPPET_MAX_LENGTH) return content; + return `${content.slice(0, DOCS_SNIPPET_MAX_LENGTH)}… (truncated; fetch the full doc with ${HelperTools.DOCS_FETCH})`; +} + const searchApifyDocsToolArgsSchema = z.object({ docSource: z .enum(DOCS_SOURCES.map((source) => source.id) as [string, ...string[]]) @@ -115,7 +121,7 @@ ${results .map((result) => { let line = `- Document URL: ${result.url}`; if (result.content) { - line += `\n Content: ${result.content}`; + line += `\n Content: ${clipSnippet(result.content)}`; } return line; }) @@ -124,7 +130,7 @@ ${results const structuredContent = { results: results.map((result) => ({ url: result.url, - ...(result.content ? { content: result.content } : {}), + ...(result.content ? { content: clipSnippet(result.content) } : {}), })), query, count: results.length, diff --git a/src/tools/core/call_actor_common.ts b/src/tools/core/call_actor_common.ts index 86bddca0..3143b32e 100644 --- a/src/tools/core/call_actor_common.ts +++ b/src/tools/core/call_actor_common.ts @@ -479,8 +479,9 @@ export async function resolveAndValidateActor(params: { error: buildMCPResponse({ texts: [ `Input is required for Actor '${actorName}'. Please provide the input parameter based on the Actor's input schema.`, - `The input schema for this Actor was retrieved and is shown below:`, - `\`\`\`json\n${JSON.stringify(actor.inputSchema)}\n\`\`\``, + // Point at fetch-actor-details rather than inlining the full schema, which can be + // very large; fetch-actor-details returns the per-field capped schema. + `Call ${HelperTools.ACTOR_GET_DETAILS} with actor='${actorName}' and output={ inputSchema: true } to retrieve the input schema.`, ], isError: true, telemetry: { @@ -507,13 +508,18 @@ export async function resolveAndValidateActor(params: { validationMissingProperty: ajvDetails.validation_missing_property, }); + // Don't inline the full input schema — for complex Actors it can be very large. The AJV + // errors already identify what's wrong; point at fetch-actor-details (which returns the + // per-field capped schema) for the full schema instead. const content = [ `Input validation failed for Actor '${actorName}'. Please ensure your input matches the Actor's input schema.`, - `Input schema:\n\`\`\`json\n${JSON.stringify(actor.inputSchema)}\n\`\`\``, ]; if (validationSummary) { content.push(`Validation errors: ${validationSummary}`); } + content.push( + `For the full input schema, call ${HelperTools.ACTOR_GET_DETAILS} with actor='${actorName}' and output={ inputSchema: true }.`, + ); return { error: buildMCPResponse({ texts: content, diff --git a/src/utils/generic.ts b/src/utils/generic.ts index d7e5a195..84a43d24 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -112,6 +112,17 @@ export function stripQuoteWrappers(s: string): string { return s.trim().replace(STRIP_QUOTE_WRAPPERS_REGEX, '').trim(); } +/** + * Truncate a string to at most `maxBytes` UTF-8 bytes without splitting a multi-byte codepoint. + * Returns the input unchanged when already within budget. + */ +export function truncateToBytes(s: string, maxBytes: number): string { + if (Buffer.byteLength(s) <= maxBytes) return s; + const sliced = Buffer.from(s, 'utf8').subarray(0, maxBytes).toString('utf8'); + // A cut that lands mid-codepoint leaves a trailing replacement char (U+FFFD); drop it. + return sliced.endsWith('�') ? sliced.slice(0, -1) : sliced; +} + /** Best-effort byte size of a value for summaries. */ export function computeValueBytes(value: unknown): number | undefined { if (Buffer.isBuffer(value)) return value.length; diff --git a/tests/unit/tools.call_actor_common.test.ts b/tests/unit/tools.call_actor_common.test.ts index 8b4c59d7..de2610e0 100644 --- a/tests/unit/tools.call_actor_common.test.ts +++ b/tests/unit/tools.call_actor_common.test.ts @@ -1,15 +1,24 @@ import { ApifyApiError } from 'apify-client'; import type { AxiosResponse } from 'axios'; -import { describe, expect, it } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { APIFY_ERROR_TYPE_MEMORY_LIMIT_EXCEEDED, FAILURE_CATEGORY, HelperTools, TOOL_STATUS } from '../../src/const.js'; +import { getActorsAsTools } from '../../src/tools/core/actor_tools_factory.js'; import { buildCallActorAppsDescription, buildCallActorDescription, buildCallActorErrorResponse, buildPermissionApprovalResponse, callActorArgs, + resolveAndValidateActor, } from '../../src/tools/core/call_actor_common.js'; +import type { InternalToolArgs, ToolEntry } from '../../src/types.js'; +import { TOOL_TYPE } from '../../src/types.js'; + +vi.mock('../../src/tools/core/actor_tools_factory.js', () => ({ + getActorsAsTools: vi.fn(), + fixActorNameInputAndLog: vi.fn((actor: string) => actor), +})); describe('call_actor_common', () => { describe('buildCallActorDescription', () => { @@ -250,4 +259,62 @@ describe('call_actor_common', () => { expect(response.content[0]?.text).toContain('This Actor requires full access to your account'); }); }); + + describe('resolveAndValidateActor — input schema is not inlined in error responses', () => { + // A property description large enough that a full-schema dump would dominate the response. + const HUGE_DESC = 'x'.repeat(30_000); + const bigSchema = { + type: 'object', + properties: { startUrls: { type: 'array', description: HUGE_DESC } }, + required: ['startUrls'], + }; + + function stubActor(valid: boolean): ToolEntry { + const ajvValidate = Object.assign(() => valid, { + errors: valid ? null : [{ message: "must have required property 'startUrls'" }], + }); + return { + type: TOOL_TYPE.ACTOR, + actorId: 'abc123', + inputSchema: bigSchema, + ajvValidate, + } as unknown as ToolEntry; + } + + const toolArgs = { apifyClient: {}, mcpSessionId: 's1' } as unknown as InternalToolArgs; + + function errorText(res: object): string { + const content = (res as { error?: { content?: { text?: string }[] } }).error?.content ?? []; + return content.map((c) => c.text ?? '').join('\n'); + } + + beforeEach(() => vi.mocked(getActorsAsTools).mockReset()); + + it('validation failure returns the AJV errors and a fetch-actor-details pointer, not the full schema', async () => { + vi.mocked(getActorsAsTools).mockResolvedValue({ tools: [stubActor(false)], errors: [] } as never); + + const res = await resolveAndValidateActor({ actorName: 'apify/x', input: { foo: 1 }, toolArgs }); + const text = errorText(res); + + expect(text).not.toContain(HUGE_DESC); + expect(text.length).toBeLessThan(2_000); + expect(text).toContain(HelperTools.ACTOR_GET_DETAILS); + expect(text).toContain("must have required property 'startUrls'"); + }); + + it('missing input returns a fetch-actor-details pointer, not the full schema', async () => { + vi.mocked(getActorsAsTools).mockResolvedValue({ tools: [stubActor(true)], errors: [] } as never); + + const res = await resolveAndValidateActor({ + actorName: 'apify/x', + input: undefined as never, + toolArgs, + }); + const text = errorText(res); + + expect(text).not.toContain(HUGE_DESC); + expect(text.length).toBeLessThan(2_000); + expect(text).toContain(HelperTools.ACTOR_GET_DETAILS); + }); + }); }); diff --git a/tests/unit/tools.get_dataset_items.test.ts b/tests/unit/tools.get_dataset_items.test.ts index 55e3d73d..48f70045 100644 --- a/tests/unit/tools.get_dataset_items.test.ts +++ b/tests/unit/tools.get_dataset_items.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it, vi } from 'vitest'; -import { HelperTools } from '../../src/const.js'; +import { DATASET_ITEMS_MAX_BYTES, HelperTools, MAX_DATASET_ITEMS_LIMIT } from '../../src/const.js'; import { extractDotPrefixes, getDatasetItems } from '../../src/tools/common/get_dataset_items.js'; import type { HelperTool, InternalToolArgs } from '../../src/types.js'; import { VERBATIM_LINKS_NUDGE } from '../../src/utils/console_link.js'; @@ -250,4 +250,66 @@ describe('get-dataset-items', () => { expect(decodeFencedToolText(content[0].text)).toEqual(data); expect(structuredContent).not.toHaveProperty('desc'); }); + + describe('response size caps', () => { + it('clamps an over-large requested limit to MAX_DATASET_ITEMS_LIMIT before fetching', async () => { + const listItemsSpy = vi.fn().mockResolvedValue({ items: [], total: 0 }); + const result = await (getDatasetItems as HelperTool).call( + stubToolCallContext({ datasetId: 'ds-1', limit: 1226 }, stubApifyClient(listItemsSpy)), + ); + const { structuredContent } = result as { structuredContent: Record }; + + expect(listItemsSpy).toHaveBeenCalledWith(expect.objectContaining({ limit: MAX_DATASET_ITEMS_LIMIT })); + expect(structuredContent.limit).toBe(MAX_DATASET_ITEMS_LIMIT); + }); + + it('does not clamp a limit already within MAX_DATASET_ITEMS_LIMIT', async () => { + const listItemsSpy = vi.fn().mockResolvedValue({ items: [], total: 0 }); + await (getDatasetItems as HelperTool).call( + stubToolCallContext({ datasetId: 'ds-1', limit: 10 }, stubApifyClient(listItemsSpy)), + ); + + expect(listItemsSpy).toHaveBeenCalledWith(expect.objectContaining({ limit: 10 })); + }); + + it('byte-caps the response: drops trailing items and steers to paginate from the returned count', async () => { + // Each item ~2 KB; a full clamped page (100) far exceeds the byte cap, forcing truncation. + const bigItems = Array.from({ length: MAX_DATASET_ITEMS_LIMIT }, (_, i) => ({ + i, + text: 'x'.repeat(2000), + })); + const result = await (getDatasetItems as HelperTool).call( + stubToolCallContext( + { datasetId: 'ds-1' }, + stubApifyClient(async () => ({ items: bigItems, total: 5000 })), + ), + ); + const { content, structuredContent } = result as TextToolResult & { + structuredContent: Record; + }; + + const itemCount = structuredContent.itemCount as number; + expect(itemCount).toBeGreaterThan(0); + expect(itemCount).toBeLessThan(bigItems.length); + expect((structuredContent.items as unknown[]).length).toBe(itemCount); + // The encoded payload (content[0]) stays within the cap. + expect(Buffer.byteLength(content[0].text)).toBeLessThanOrEqual(DATASET_ITEMS_MAX_BYTES); + // Next step resumes from the actually-returned count (not the requested limit) and notes the cap. + expect(structuredContent.nextStep).toContain(`offset=${itemCount}`); + expect(structuredContent.nextStep).toContain('capped'); + }); + + it('returns at least one item even when a single item exceeds the byte cap', async () => { + const hugeItem = { text: 'x'.repeat(DATASET_ITEMS_MAX_BYTES * 2) }; + const result = await (getDatasetItems as HelperTool).call( + stubToolCallContext( + { datasetId: 'ds-1' }, + stubApifyClient(async () => ({ items: [hugeItem], total: 1 })), + ), + ); + const { structuredContent } = result as { structuredContent: Record }; + + expect(structuredContent.itemCount).toBe(1); + }); + }); }); diff --git a/tests/unit/tools.get_key_value_store_record.test.ts b/tests/unit/tools.get_key_value_store_record.test.ts index 35bf39e5..d7d4a0fd 100644 --- a/tests/unit/tools.get_key_value_store_record.test.ts +++ b/tests/unit/tools.get_key_value_store_record.test.ts @@ -5,7 +5,7 @@ import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { describe, expect, it, vi } from 'vitest'; -import { HelperTools, KV_RECORD_MAX_INLINE_BYTES } from '../../src/const.js'; +import { HelperTools, KV_RECORD_MAX_INLINE_BYTES, KV_RECORD_MAX_INLINE_TEXT_BYTES } from '../../src/const.js'; import { getKeyValueStoreRecord } from '../../src/tools/common/get_key_value_store_record.js'; import { keyValueStoreRecordOutputSchema } from '../../src/tools/structured_output_schemas.js'; import type { HelperTool, InternalToolArgs } from '../../src/types.js'; @@ -222,6 +222,60 @@ describe('get-key-value-store-record', () => { }); }); + it('truncates an over-cap text value and points at the full record', async () => { + const big = 'a'.repeat(KV_RECORD_MAX_INLINE_TEXT_BYTES + 10_000); + const onGetRecordPublicUrl = vi.fn(); + const result = await (getKeyValueStoreRecord as HelperTool).call( + stubToolCallContext( + { keyValueStoreId: 'kv-1', recordKey: 'big.txt' }, + stubApifyClient({ + record: { key: 'big.txt', value: big, contentType: 'text/plain' }, + onGetRecordPublicUrl, + }), + ), + ); + const { isError, structuredContent } = result as TextToolResult & { + structuredContent: Record; + }; + + expect(isError).not.toBe(true); + expect(onGetRecordPublicUrl).toHaveBeenCalled(); + expect(Buffer.byteLength(structuredContent.value as string)).toBeLessThanOrEqual( + KV_RECORD_MAX_INLINE_TEXT_BYTES, + ); + expect(structuredContent.summary).toContain('truncated'); + expect(structuredContent.summary).toContain('signature=signed'); + // Still schema-conforming (value is a string, all required fields present). + expectSchemaConformingStructuredContent(result); + }); + + it('truncates an over-cap JSON value (serialized) and keeps it schema-conforming', async () => { + const bigJson = { blob: 'b'.repeat(KV_RECORD_MAX_INLINE_TEXT_BYTES + 10_000) }; + const result = await (getKeyValueStoreRecord as HelperTool).call( + stubToolCallContext( + { keyValueStoreId: 'kv-1', recordKey: 'big.json' }, + stubApifyClient({ record: { key: 'big.json', value: bigJson, contentType: 'application/json' } }), + ), + ); + const { structuredContent } = result as { structuredContent: Record }; + + expect(Buffer.byteLength(structuredContent.value as string)).toBeLessThanOrEqual( + KV_RECORD_MAX_INLINE_TEXT_BYTES, + ); + expectSchemaConformingStructuredContent(result); + }); + + it('does not truncate a text value within the cap', async () => { + const record = { key: 'small.txt', value: 'short value', contentType: 'text/plain' }; + const result = await (getKeyValueStoreRecord as HelperTool).call( + stubToolCallContext({ keyValueStoreId: 'kv-1', recordKey: 'small.txt' }, stubApifyClient({ record })), + ); + const { structuredContent } = result as { structuredContent: Record }; + + expect(structuredContent.value).toBe('short value'); + expect(structuredContent.summary).not.toContain('truncated'); + }); + it('returns isError "record not found" when getRecord is undefined but the store exists', async () => { const result = await (getKeyValueStoreRecord as HelperTool).call( stubToolCallContext( diff --git a/tests/unit/tools.search_apify_docs.test.ts b/tests/unit/tools.search_apify_docs.test.ts new file mode 100644 index 00000000..79b544cf --- /dev/null +++ b/tests/unit/tools.search_apify_docs.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { DOCS_SNIPPET_MAX_LENGTH, HelperTools } from '../../src/const.js'; +import { searchApifyDocsTool } from '../../src/tools/common/search_apify_docs.js'; +import type { HelperTool } from '../../src/types.js'; +import { searchDocsBySourceCached } from '../../src/utils/apify_docs.js'; +import { stubToolCallContext, type TextToolResult } from './helpers/tool_context.js'; + +vi.mock('../../src/utils/apify_docs.js', () => ({ + searchDocsBySourceCached: vi.fn(), +})); + +describe('search-apify-docs snippet cap', () => { + it('clips an over-long snippet in both the text block and structuredContent', async () => { + const longContent = 'c'.repeat(DOCS_SNIPPET_MAX_LENGTH + 2000); + vi.mocked(searchDocsBySourceCached).mockResolvedValue([ + { url: 'https://docs.apify.com/platform/x', content: longContent }, + ]); + + const result = await (searchApifyDocsTool as HelperTool).call( + stubToolCallContext({ docSource: 'apify', query: 'standby actor' }, {} as never), + ); + const { content, structuredContent } = result as TextToolResult & { + structuredContent: { results: { url: string; content?: string }[] }; + }; + + const snippet = structuredContent.results[0].content as string; + expect(snippet.length).toBeLessThan(longContent.length); + expect(snippet).toContain(HelperTools.DOCS_FETCH); + expect(snippet.startsWith('c'.repeat(DOCS_SNIPPET_MAX_LENGTH))).toBe(true); + // The full untruncated content never reaches the text channel either. + expect(content[0].text).not.toContain(longContent); + }); + + it('leaves a snippet within the cap untouched', async () => { + const shortContent = 'short snippet'; + vi.mocked(searchDocsBySourceCached).mockResolvedValue([ + { url: 'https://docs.apify.com/platform/y', content: shortContent }, + ]); + + const result = await (searchApifyDocsTool as HelperTool).call( + stubToolCallContext({ docSource: 'apify', query: 'whatever' }, {} as never), + ); + const { structuredContent } = result as { + structuredContent: { results: { content?: string }[] }; + }; + + expect(structuredContent.results[0].content).toBe(shortContent); + }); +});